[downloader/aria2c] Native progress for aria2c via RPC (#3724)

Authored by: Lesmiscore, pukkandan

Closes #2038
This commit is contained in:
Lesmiscore 2023-01-02 02:16:25 +09:00 committed by GitHub
parent 193fb150b7
commit 8c53322cda
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 119 additions and 8 deletions

View file

@ -153,6 +153,7 @@ ### Differences in default behavior
* When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this * When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this
* `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi` * `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi`
* yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior * yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior
* yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: `aria2c`). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is
For ease of use, a few more compat options are available: For ease of use, a few more compat options are available:
@ -160,7 +161,7 @@ ### Differences in default behavior
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams` * `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams`
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect` * `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect`
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date` * `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
* `--compat-options 2022`: Currently does nothing. Use this to enable all future compat options * `--compat-options 2022`: Same as `--compat-options no-external-downloader-progress`. Use this to enable all future compat options
# INSTALLATION # INSTALLATION

View file

@ -1,9 +1,11 @@
import enum import enum
import json
import os.path import os.path
import re import re
import subprocess import subprocess
import sys import sys
import time import time
import uuid
from .fragment import FragmentFD from .fragment import FragmentFD
from ..compat import functools from ..compat import functools
@ -20,8 +22,10 @@
determine_ext, determine_ext,
encodeArgument, encodeArgument,
encodeFilename, encodeFilename,
find_available_port,
handle_youtubedl_headers, handle_youtubedl_headers,
remove_end, remove_end,
sanitized_Request,
traverse_obj, traverse_obj,
) )
@ -60,7 +64,6 @@ def real_download(self, filename, info_dict):
} }
if filename != '-': if filename != '-':
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(f'\r[{self.get_basename()}] Downloaded {fsize} bytes')
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
status.update({ status.update({
'downloaded_bytes': fsize, 'downloaded_bytes': fsize,
@ -129,8 +132,7 @@ def _call_downloader(self, tmpfilename, info_dict):
self._debug_cmd(cmd) self._debug_cmd(cmd)
if 'fragments' not in info_dict: if 'fragments' not in info_dict:
_, stderr, returncode = Popen.run( _, stderr, returncode = self._call_process(cmd, info_dict)
cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None)
if returncode and stderr: if returncode and stderr:
self.to_stderr(stderr) self.to_stderr(stderr)
return returncode return returncode
@ -140,7 +142,7 @@ def _call_downloader(self, tmpfilename, info_dict):
retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry, retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry,
frag_index=None, fatal=not skip_unavailable_fragments) frag_index=None, fatal=not skip_unavailable_fragments)
for retry in retry_manager: for retry in retry_manager:
_, stderr, returncode = Popen.run(cmd, text=True, stderr=subprocess.PIPE) _, stderr, returncode = self._call_process(cmd, info_dict)
if not returncode: if not returncode:
break break
# TODO: Decide whether to retry based on error code # TODO: Decide whether to retry based on error code
@ -172,6 +174,9 @@ def _call_downloader(self, tmpfilename, info_dict):
self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename)) self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename))
return 0 return 0
def _call_process(self, cmd, info_dict):
return Popen.run(cmd, text=True, stderr=subprocess.PIPE)
class CurlFD(ExternalFD): class CurlFD(ExternalFD):
AVAILABLE_OPT = '-V' AVAILABLE_OPT = '-V'
@ -256,6 +261,14 @@ def supports_manifest(manifest):
def _aria2c_filename(fn): def _aria2c_filename(fn):
return fn if os.path.isabs(fn) else f'.{os.path.sep}{fn}' return fn if os.path.isabs(fn) else f'.{os.path.sep}{fn}'
def _call_downloader(self, tmpfilename, info_dict):
if 'no-external-downloader-progress' not in self.params.get('compat_opts', []):
info_dict['__rpc'] = {
'port': find_available_port() or 19190,
'secret': str(uuid.uuid4()),
}
return super()._call_downloader(tmpfilename, info_dict)
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-c', cmd = [self.exe, '-c',
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide', '--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
@ -276,6 +289,12 @@ def _make_cmd(self, tmpfilename, info_dict):
cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=') cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=')
cmd += self._configuration_args() cmd += self._configuration_args()
if '__rpc' in info_dict:
cmd += [
'--enable-rpc',
f'--rpc-listen-port={info_dict["__rpc"]["port"]}',
f'--rpc-secret={info_dict["__rpc"]["secret"]}']
# aria2c strips out spaces from the beginning/end of filenames and paths. # aria2c strips out spaces from the beginning/end of filenames and paths.
# We work around this issue by adding a "./" to the beginning of the # We work around this issue by adding a "./" to the beginning of the
# filename and relative path, and adding a "/" at the end of the path. # filename and relative path, and adding a "/" at the end of the path.
@ -304,6 +323,88 @@ def _make_cmd(self, tmpfilename, info_dict):
cmd += ['--', info_dict['url']] cmd += ['--', info_dict['url']]
return cmd return cmd
def aria2c_rpc(self, rpc_port, rpc_secret, method, params=()):
# Does not actually need to be UUID, just unique
sanitycheck = str(uuid.uuid4())
d = json.dumps({
'jsonrpc': '2.0',
'id': sanitycheck,
'method': method,
'params': [f'token:{rpc_secret}', *params],
}).encode('utf-8')
request = sanitized_Request(
f'http://localhost:{rpc_port}/jsonrpc',
data=d, headers={
'Content-Type': 'application/json',
'Content-Length': f'{len(d)}',
'Ytdl-request-proxy': '__noproxy__',
})
with self.ydl.urlopen(request) as r:
resp = json.load(r)
assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server'
return resp['result']
def _call_process(self, cmd, info_dict):
if '__rpc' not in info_dict:
return super()._call_process(cmd, info_dict)
send_rpc = functools.partial(self.aria2c_rpc, info_dict['__rpc']['port'], info_dict['__rpc']['secret'])
started = time.time()
fragmented = 'fragments' in info_dict
frag_count = len(info_dict['fragments']) if fragmented else 1
status = {
'filename': info_dict.get('_filename'),
'status': 'downloading',
'elapsed': 0,
'downloaded_bytes': 0,
'fragment_count': frag_count if fragmented else None,
'fragment_index': 0 if fragmented else None,
}
self._hook_progress(status, info_dict)
def get_stat(key, *obj, average=False):
val = tuple(filter(None, map(float, traverse_obj(obj, (..., ..., key))))) or [0]
return sum(val) / (len(val) if average else 1)
with Popen(cmd, text=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) as p:
# Add a small sleep so that RPC client can receive response,
# or the connection stalls infinitely
time.sleep(0.2)
retval = p.poll()
while retval is None:
# We don't use tellStatus as we won't know the GID without reading stdout
# Ref: https://aria2.github.io/manual/en/html/aria2c.html#aria2.tellActive
active = send_rpc('aria2.tellActive')
completed = send_rpc('aria2.tellStopped', [0, frag_count])
downloaded = get_stat('totalLength', completed) + get_stat('completedLength', active)
speed = get_stat('downloadSpeed', active)
total = frag_count * get_stat('totalLength', active, completed, average=True)
if total < downloaded:
total = None
status.update({
'downloaded_bytes': int(downloaded),
'speed': speed,
'total_bytes': None if fragmented else total,
'total_bytes_estimate': total,
'eta': (total - downloaded) / (speed or 1),
'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None,
'elapsed': time.time() - started
})
self._hook_progress(status, info_dict)
if not active and len(completed) >= frag_count:
send_rpc('aria2.shutdown')
retval = p.wait()
break
time.sleep(0.1)
retval = p.poll()
return '', p.stderr.read(), retval
class HttpieFD(ExternalFD): class HttpieFD(ExternalFD):
AVAILABLE_OPT = '--version' AVAILABLE_OPT = '--version'

View file

@ -464,14 +464,14 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
'allowed_values': { 'allowed_values': {
'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles', 'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles',
'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge', 'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge',
'no-attach-info-json', 'embed-metadata', 'embed-thumbnail-atomicparsley', 'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress',
'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi', 'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date', 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
}, 'aliases': { }, 'aliases': {
'youtube-dl': ['all', '-multistreams'], 'youtube-dl': ['all', '-multistreams'],
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat'], 'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat'],
'2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'], '2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'],
'2022': [], '2022': ['no-external-downloader-progress'],
} }
}, help=( }, help=(
'Options that can help keep compatibility with youtube-dl or youtube-dlc ' 'Options that can help keep compatibility with youtube-dl or youtube-dlc '

View file

@ -5243,6 +5243,15 @@ def random_birthday(year_field, month_field, day_field):
} }
def find_available_port(interface=''):
try:
with socket.socket() as sock:
sock.bind((interface, 0))
return sock.getsockname()[1]
except OSError:
return None
# Templates for internet shortcut files, which are plain text files. # Templates for internet shortcut files, which are plain text files.
DOT_URL_LINK_TEMPLATE = '''\ DOT_URL_LINK_TEMPLATE = '''\
[InternetShortcut] [InternetShortcut]