From 7e68567e508168b345266c0c19812ad50a829eaa Mon Sep 17 00:00:00 2001 From: bashonly Date: Wed, 8 Feb 2023 11:03:54 +0530 Subject: [PATCH] [downloader/hls] Allow extractors to provide AES key (#6158) and related cleanup Authored by: bashonly, Grub4K Co-authored-by: Simon Sawicki --- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/downloader/external.py | 1 + yt_dlp/downloader/fragment.py | 3 ++- yt_dlp/downloader/hls.py | 45 ++++++++++++++++++++++------------- yt_dlp/extractor/common.py | 13 +++++++++- yt_dlp/extractor/vzaar.py | 2 +- 6 files changed, 45 insertions(+), 21 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index e092aed67..8f88104ef 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -554,7 +554,7 @@ class YoutubeDL: 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns', 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'preference', 'language', 'language_preference', 'quality', 'source_preference', - 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options', + 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options', 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time' } _format_selection_exts = { diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 3917af448..5f54017a8 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -104,6 +104,7 @@ class ExternalFD(FragmentFD): return all(( not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES, '+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES, + not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url'), all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')), )) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 83f7870ed..02f8559cc 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -360,7 +360,8 @@ class FragmentFD(FileDownloader): if not decrypt_info or decrypt_info['METHOD'] != 'AES-128': return frag_content iv = decrypt_info.get('IV') or struct.pack('>8xq', fragment['media_sequence']) - decrypt_info['KEY'] = decrypt_info.get('KEY') or _get_key(info_dict.get('_decryption_key_url') or decrypt_info['URI']) + decrypt_info['KEY'] = (decrypt_info.get('KEY') + or _get_key(traverse_obj(info_dict, ('hls_aes', 'uri')) or decrypt_info['URI'])) # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded, # not what it decrypts to. diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index ae18ac419..29d6f6241 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -8,7 +8,14 @@ from .external import FFmpegFD from .fragment import FragmentFD from .. import webvtt from ..dependencies import Cryptodome -from ..utils import bug_reports_message, parse_m3u8_attributes, update_url_query +from ..utils import ( + bug_reports_message, + parse_m3u8_attributes, + remove_start, + traverse_obj, + update_url_query, + urljoin, +) class HlsFD(FragmentFD): @@ -150,6 +157,13 @@ class HlsFD(FragmentFD): i = 0 media_sequence = 0 decrypt_info = {'METHOD': 'NONE'} + external_aes_key = traverse_obj(info_dict, ('hls_aes', 'key')) + if external_aes_key: + external_aes_key = binascii.unhexlify(remove_start(external_aes_key, '0x')) + assert len(external_aes_key) in (16, 24, 32), 'Invalid length for HLS AES-128 key' + external_aes_iv = traverse_obj(info_dict, ('hls_aes', 'iv')) + if external_aes_iv: + external_aes_iv = binascii.unhexlify(remove_start(external_aes_iv, '0x').zfill(32)) byte_range = {} discontinuity_count = 0 frag_index = 0 @@ -165,10 +179,7 @@ class HlsFD(FragmentFD): frag_index += 1 if frag_index <= ctx['fragment_index']: continue - frag_url = ( - line - if re.match(r'^https?://', line) - else urllib.parse.urljoin(man_url, line)) + frag_url = urljoin(man_url, line) if extra_query: frag_url = update_url_query(frag_url, extra_query) @@ -190,10 +201,7 @@ class HlsFD(FragmentFD): return False frag_index += 1 map_info = parse_m3u8_attributes(line[11:]) - frag_url = ( - map_info.get('URI') - if re.match(r'^https?://', map_info.get('URI')) - else urllib.parse.urljoin(man_url, map_info.get('URI'))) + frag_url = urljoin(man_url, map_info.get('URI')) if extra_query: frag_url = update_url_query(frag_url, extra_query) @@ -218,15 +226,18 @@ class HlsFD(FragmentFD): decrypt_url = decrypt_info.get('URI') decrypt_info = parse_m3u8_attributes(line[11:]) if decrypt_info['METHOD'] == 'AES-128': - if 'IV' in decrypt_info: + if external_aes_iv: + decrypt_info['IV'] = external_aes_iv + elif 'IV' in decrypt_info: decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32)) - if not re.match(r'^https?://', decrypt_info['URI']): - decrypt_info['URI'] = urllib.parse.urljoin( - man_url, decrypt_info['URI']) - if extra_query: - decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) - if decrypt_url != decrypt_info['URI']: - decrypt_info['KEY'] = None + if external_aes_key: + decrypt_info['KEY'] = external_aes_key + else: + decrypt_info['URI'] = urljoin(man_url, decrypt_info['URI']) + if extra_query: + decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) + if decrypt_url != decrypt_info['URI']: + decrypt_info['KEY'] = None elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): media_sequence = int(line[22:]) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index f80536470..09b03e69a 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -81,8 +81,8 @@ from ..utils import ( update_Request, update_url_query, url_basename, - urlhandle_detect_ext, url_or_none, + urlhandle_detect_ext, urljoin, variadic, xpath_element, @@ -220,6 +220,17 @@ class InfoExtractor: * no_resume The server does not support resuming the (HTTP or RTMP) download. Boolean. * has_drm The format has DRM and cannot be downloaded. Boolean + * extra_param_to_segment_url A query string to append to each + fragment's URL, or to update each existing query string + with. Only applied by the native HLS/DASH downloaders. + * hls_aes A dictionary of HLS AES-128 decryption information + used by the native HLS downloader to override the + values in the media playlist when an '#EXT-X-KEY' tag + is present in the playlist: + * uri The URI from which the key will be downloaded + * key The key (as hex) used to decrypt fragments. + If `key` is given, any key URI will be ignored + * iv The IV (as hex) used to decrypt fragments * downloader_options A dictionary of downloader options (For internal use only) * http_chunk_size Chunk size for HTTP downloads diff --git a/yt_dlp/extractor/vzaar.py b/yt_dlp/extractor/vzaar.py index 6b9817c9e..19908a929 100644 --- a/yt_dlp/extractor/vzaar.py +++ b/yt_dlp/extractor/vzaar.py @@ -87,7 +87,7 @@ class VzaarIE(InfoExtractor): m3u8_id='hls', fatal=False) if hls_aes: for f in m3u8_formats: - f['_decryption_key_url'] = url_templ % ('goose', '') + qs + f['hls_aes'] = {'uri': url_templ % ('goose', '') + qs} formats.extend(m3u8_formats) return {