From 9d6254069c75877bc88bc3584f4326fb1853a543 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Wed, 20 Sep 2023 19:14:10 +0000 Subject: [PATCH 01/41] Update to ytdl-commit-66ab08 (#8128) [utils] Revert bbd3e7e, updating docstring, test instead https://github.com/ytdl-org/youtube-dl/commit/66ab0814c4baa2dc79c2dd5287bc0ad61a37c5b9 Authored by: coletdjnz --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c7b73f4fd..d94d8ea82 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t # NEW FEATURES -* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@42f2d4**](https://github.com/ytdl-org/youtube-dl/commit/07af47960f3bb262ead02490ce65c8c45c01741e) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21)) +* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@66ab08**](https://github.com/ytdl-org/youtube-dl/commit/66ab0814c4baa2dc79c2dd5287bc0ad61a37c5b9) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21)) * **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API From 35f9a306e6934793cff100200cd03f288ec33f11 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 21 Sep 2023 10:58:53 -0500 Subject: [PATCH 02/41] [dependencies] Handle deprecation of `sqlite3.version` (#8167) Closes #8152 Authored by: bashonly --- yt_dlp/compat/compat_utils.py | 2 +- yt_dlp/dependencies/__init__.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/yt_dlp/compat/compat_utils.py b/yt_dlp/compat/compat_utils.py index 3ca46d270..d62b7d048 100644 --- a/yt_dlp/compat/compat_utils.py +++ b/yt_dlp/compat/compat_utils.py @@ -15,7 +15,7 @@ def get_package_info(module): name=getattr(module, '_yt_dlp__identifier', module.__name__), version=str(next(filter(None, ( getattr(module, attr, None) - for attr in ('__version__', 'version_string', 'version') + for attr in ('_yt_dlp__version', '__version__', 'version_string', 'version') )), None))) diff --git a/yt_dlp/dependencies/__init__.py b/yt_dlp/dependencies/__init__.py index 6e7d29c5c..b56e4f5cc 100644 --- a/yt_dlp/dependencies/__init__.py +++ b/yt_dlp/dependencies/__init__.py @@ -43,6 +43,8 @@ except Exception as _err: try: import sqlite3 + # We need to get the underlying `sqlite` version, see https://github.com/yt-dlp/yt-dlp/issues/8152 + sqlite3._yt_dlp__version = sqlite3.sqlite_version except ImportError: # although sqlite3 is part of the standard library, it is possible to compile python without # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544 From 295fbb3ae3a7d0dd50e286be5c487cf145ed5778 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Fri, 22 Sep 2023 01:28:20 +0800 Subject: [PATCH 03/41] [ie/eplus:inbound] Add extractor (#5782) Authored by: pzhlkj6612 --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/eplus.py | 96 +++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 yt_dlp/extractor/eplus.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 490b010b8..3ce6baef2 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -565,6 +565,7 @@ from .epicon import ( EpiconIE, EpiconSeriesIE, ) +from .eplus import EplusIbIE from .epoch import EpochIE from .eporner import EpornerIE from .eroprofile import ( diff --git a/yt_dlp/extractor/eplus.py b/yt_dlp/extractor/eplus.py new file mode 100644 index 000000000..3ebdcf5fb --- /dev/null +++ b/yt_dlp/extractor/eplus.py @@ -0,0 +1,96 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + try_call, + unified_timestamp, +) + + +class EplusIbIE(InfoExtractor): + IE_NAME = 'eplus:inbound' + IE_DESC = 'e+ (イープラス) overseas' + _VALID_URL = r'https?://live\.eplus\.jp/ex/player\?ib=(?P(?:\w|%2B|%2F){86}%3D%3D)' + _TESTS = [{ + 'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D', + 'info_dict': { + 'id': '354502-0001-002', + 'title': 'LoveLive!Series Presents COUNTDOWN LoveLive! 2021→2022~LIVE with a smile!~【Streaming+(配信)】', + 'live_status': 'was_live', + 'release_date': '20211231', + 'release_timestamp': 1640952000, + 'description': str, + }, + 'params': { + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + 'expected_warnings': [ + 'Could not find the playlist URL. This event may not be accessible', + 'No video formats found!', + 'Requested format is not available', + ], + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + data_json = self._search_json(r'', webpage, 'JS sign func', fatal=fatal) + + +class DouyuTVIE(DouyuBaseIE): + IE_DESC = '斗鱼直播' _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=|(?:[^/]+/))*(?P[A-Za-z0-9]+)' _TESTS = [{ - 'url': 'http://www.douyutv.com/iseven', + 'url': 'https://www.douyu.com/pigff', 'info_dict': { - 'id': '17732', - 'display_id': 'iseven', - 'ext': 'flv', - 'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'description': r're:.*m7show@163\.com.*', - 'thumbnail': r're:^https?://.*\.png', - 'uploader': '7师傅', + 'id': '24422', + 'display_id': 'pigff', + 'ext': 'mp4', + 'title': 're:^【PIGFF】.* [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': r'≥15级牌子看鱼吧置顶帖进粉丝vx群', + 'thumbnail': str, + 'uploader': 'pigff', 'is_live': True, + 'live_status': 'is_live', }, 'params': { 'skip_download': True, @@ -85,15 +126,43 @@ class DouyuTVIE(InfoExtractor): 'only_matching': True, }] + def _get_sign_func(self, room_id, video_id): + return self._download_json( + f'https://www.douyu.com/swf_api/homeH5Enc?rids={room_id}', video_id, + note='Getting signing script')['data'][f'room{room_id}'] + + def _extract_stream_formats(self, stream_formats): + formats = [] + for stream_info in traverse_obj(stream_formats, (..., 'data')): + stream_url = urljoin( + traverse_obj(stream_info, 'rtmp_url'), traverse_obj(stream_info, 'rtmp_live')) + if stream_url: + rate_id = traverse_obj(stream_info, ('rate', {int_or_none})) + rate_info = traverse_obj(stream_info, ('multirates', lambda _, v: v['rate'] == rate_id), get_all=False) + ext = determine_ext(stream_url) + formats.append({ + 'url': stream_url, + 'format_id': str_or_none(rate_id), + 'ext': 'mp4' if ext == 'm3u8' else ext, + 'protocol': 'm3u8_native' if ext == 'm3u8' else 'https', + 'quality': rate_id % -10000 if rate_id is not None else None, + **traverse_obj(rate_info, { + 'format': ('name', {str_or_none}), + 'tbr': ('bit', {int_or_none}), + }), + }) + return formats + def _real_extract(self, url): video_id = self._match_id(url) - if video_id.isdigit(): - room_id = video_id - else: - page = self._download_webpage(url, video_id) - room_id = self._html_search_regex( - r'"room_id\\?"\s*:\s*(\d+),', page, 'room id') + webpage = self._download_webpage(url, video_id) + room_id = self._search_regex(r'\$ROOM\.room_id\s*=\s*(\d+)', webpage, 'room id') + + if self._search_regex(r'"videoLoop"\s*:\s*(\d+)', webpage, 'loop', default='') == '1': + raise UserNotLive('The channel is auto-playing VODs', video_id=video_id) + if self._search_regex(r'\$ROOM\.show_status\s*=\s*(\d+)', webpage, 'status', default='') == '2': + raise UserNotLive(video_id=video_id) # Grab metadata from API params = { @@ -102,110 +171,136 @@ class DouyuTVIE(InfoExtractor): 'time': int(time.time()), } params['auth'] = hashlib.md5( - f'room/{video_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest() - room = self._download_json( + f'room/{room_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest() + room = traverse_obj(self._download_json( f'http://www.douyutv.com/api/v1/room/{room_id}', video_id, - note='Downloading room info', query=params)['data'] + note='Downloading room info', query=params, fatal=False), 'data') # 1 = live, 2 = offline - if room.get('show_status') == '2': - raise ExtractorError('Live stream is offline', expected=True) + if traverse_obj(room, 'show_status') == '2': + raise UserNotLive(video_id=video_id) - video_url = urljoin('https://hls3-akm.douyucdn.cn/', self._search_regex(r'(live/.*)', room['hls_url'], 'URL')) - formats, subs = self._extract_m3u8_formats_and_subtitles(video_url, room_id) + js_sign_func = self._search_js_sign_func(webpage, fatal=False) or self._get_sign_func(room_id, video_id) + form_data = { + 'rate': 0, + **self._calc_sign(js_sign_func, video_id, room_id), + } + stream_formats = [self._download_json( + f'https://www.douyu.com/lapi/live/getH5Play/{room_id}', + video_id, note="Downloading livestream format", + data=urlencode_postdata(form_data))] - title = unescapeHTML(room['room_name']) - description = room.get('show_details') - thumbnail = room.get('room_src') - uploader = room.get('nickname') + for rate_id in traverse_obj(stream_formats[0], ('data', 'multirates', ..., 'rate')): + if rate_id != traverse_obj(stream_formats[0], ('data', 'rate')): + form_data['rate'] = rate_id + stream_formats.append(self._download_json( + f'https://www.douyu.com/lapi/live/getH5Play/{room_id}', + video_id, note=f'Downloading livestream format {rate_id}', + data=urlencode_postdata(form_data))) return { 'id': room_id, - 'display_id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, + 'formats': self._extract_stream_formats(stream_formats), 'is_live': True, - 'subtitles': subs, - 'formats': formats, + **traverse_obj(room, { + 'display_id': ('url', {str}, {lambda i: i[1:]}), + 'title': ('room_name', {unescapeHTML}), + 'description': ('show_details', {str}), + 'uploader': ('nickname', {str}), + 'thumbnail': ('room_src', {url_or_none}), + }) } -class DouyuShowIE(InfoExtractor): +class DouyuShowIE(DouyuBaseIE): _VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P[0-9a-zA-Z]+)' _TESTS = [{ - 'url': 'https://v.douyu.com/show/rjNBdvnVXNzvE2yw', - 'md5': '0c2cfd068ee2afe657801269b2d86214', + 'url': 'https://v.douyu.com/show/mPyq7oVNe5Yv1gLY', 'info_dict': { - 'id': 'rjNBdvnVXNzvE2yw', + 'id': 'mPyq7oVNe5Yv1gLY', 'ext': 'mp4', - 'title': '陈一发儿:砒霜 我有个室友系列!04-01 22点场', - 'duration': 7150.08, - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': '陈一发儿', - 'uploader_id': 'XrZwYelr5wbK', - 'uploader_url': 'https://v.douyu.com/author/XrZwYelr5wbK', - 'upload_date': '20170402', + 'title': '四川人小时候的味道“蒜苗回锅肉”,传统菜不能丢,要常做来吃', + 'duration': 633, + 'thumbnail': str, + 'uploader': '美食作家王刚V', + 'uploader_id': 'OVAO4NVx1m7Q', + 'timestamp': 1661850002, + 'upload_date': '20220830', + 'view_count': int, + 'tags': ['美食', '美食综合'], }, }, { 'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw', 'only_matching': True, }] + _FORMATS = { + 'super': '原画', + 'high': '超清', + 'normal': '高清', + } + + _QUALITIES = { + 'super': -1, + 'high': -2, + 'normal': -3, + } + + _RESOLUTIONS = { + 'super': '1920x1080', + 'high': '1280x720', + 'normal': '852x480', + } + def _real_extract(self, url): url = url.replace('vmobile.', 'v.') video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - room_info = self._parse_json(self._search_regex( - r'var\s+\$ROOM\s*=\s*({.+});', webpage, 'room info'), video_id) + video_info = self._search_json( + r'') + if player: + for src in traverse_obj(player, ('lib', 'source', 'sources', ...)): + process_format_list(src) + duration = traverse_obj(player, ('sourceInfo', 'duration', {int_or_none})) + if not formats and not has_drm: + # older code path, in use before August 2023 + player = self._parse_json( + self._search_regex( + (r'(?:(?:replacePlaceholders|processAdTagModifier).*?:\s*)?(?:replacePlaceholders|processAdTagModifier)\s*\(\s*(?P{.*?})\s*\)(?:\s*\))?\s*,', + r'Player\.init\s*\([^,]+,(?P\s*\w+\s*\?)?\s*(?P{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)'), + webpage, 'player', group='json'), video_id) + if player: + for format_id, format_list in player['tracks'].items(): + process_format_list(format_list, format_id) + duration = int_or_none(player.get('duration')) if not formats and has_drm: self.report_drm(video_id) From 52414d64ca7b92d3f83964cdd68247989b0c4625 Mon Sep 17 00:00:00 2001 From: bashonly Date: Thu, 21 Sep 2023 16:51:57 -0500 Subject: [PATCH 10/41] [utils] `js_to_json`: Handle `Array` objects Authored by: Grub4K, std-move Co-authored-by: std-move <26625259+std-move@users.noreply.github.com> Co-authored-by: Simon Sawicki --- test/test_utils.py | 6 ++++++ yt_dlp/utils/_utils.py | 1 + 2 files changed, 7 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 91e3ffd39..47d1f71bf 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1218,6 +1218,12 @@ class TestUtil(unittest.TestCase): self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""') self.assertEqual(js_to_json('`${name}`', {}), '"name"') + def test_js_to_json_map_array_constructors(self): + self.assertEqual(json.loads(js_to_json('new Map([["a", 5]])')), {'a': 5}) + self.assertEqual(json.loads(js_to_json('Array(5, 10)')), [5, 10]) + self.assertEqual(json.loads(js_to_json('new Array(15,5)')), [15, 5]) + self.assertEqual(json.loads(js_to_json('new Map([Array(5, 10),new Array(15,5)])')), {'5': 10, '15': 5}) + def test_extract_attributes(self): self.assertEqual(extract_attributes(''), {'x': 'y'}) self.assertEqual(extract_attributes(""), {'x': 'y'}) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index ef26de116..213ccc636 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -2727,6 +2727,7 @@ def js_to_json(code, vars={}, *, strict=False): def create_map(mobj): return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars)))) + code = re.sub(r'(?:new\s+)?Array\((.*?)\)', r'[\g<1>]', code) code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code) if not strict: code = re.sub(r'new Date\((".+")\)', r'\g<1>', code) From 904a19ee93195ce0bd4b08bd22b186120afb5b17 Mon Sep 17 00:00:00 2001 From: bashonly Date: Thu, 21 Sep 2023 16:54:57 -0500 Subject: [PATCH 11/41] [ie] Make `_search_nuxt_data` more lenient Authored by: std-move Co-authored-by: std-move <26625259+std-move@users.noreply.github.com> --- yt_dlp/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 7deab995c..c94b4abdc 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1687,7 +1687,7 @@ class InfoExtractor: def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)): """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function""" rectx = re.escape(context_name) - FUNCTION_RE = r'\(function\((?P.*?)\){return\s+(?P{.*?})\s*;?\s*}\((?P.*?)\)' + FUNCTION_RE = r'\(function\((?P.*?)\){(?:.*?)return\s+(?P{.*?})\s*;?\s*}\((?P.*?)\)' js, arg_keys, arg_vals = self._search_regex( (rf'', rf'{rectx}\(.*?{FUNCTION_RE}'), webpage, context_name, group=('js', 'arg_keys', 'arg_vals'), From 568f08051841aedea968258889539741e26009e9 Mon Sep 17 00:00:00 2001 From: std-move <26625259+std-move@users.noreply.github.com> Date: Fri, 22 Sep 2023 00:20:52 +0200 Subject: [PATCH 12/41] [ie/iprima] Fix extractor (#7216) Closes #7229 Authored by: std-move --- yt_dlp/extractor/iprima.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/iprima.py b/yt_dlp/extractor/iprima.py index 6dec1510d..f7aa579b3 100644 --- a/yt_dlp/extractor/iprima.py +++ b/yt_dlp/extractor/iprima.py @@ -134,10 +134,17 @@ class IPrimaIE(InfoExtractor): ), webpage, 'real id', group='id', default=None) if not video_id: - nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data') + nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data', fatal=False) video_id = traverse_obj( nuxt_data, (..., 'content', 'additionals', 'videoPlayId', {str}), get_all=False) + if not video_id: + nuxt_data = self._search_json( + r']+\bid=["\']__NUXT_DATA__["\'][^>]*>', + webpage, 'nuxt data', None, end_pattern=r'', contains_pattern=r'\[(?s:.+)\]') + + video_id = traverse_obj(nuxt_data, lambda _, v: re.fullmatch(r'p\d+', v), get_all=False) + if not video_id: self.raise_no_formats('Unable to extract video ID from webpage') From 661c9a1d029296b28e0b2f8be8a72a43abaf6536 Mon Sep 17 00:00:00 2001 From: bashonly Date: Thu, 21 Sep 2023 17:48:57 -0500 Subject: [PATCH 13/41] [test:download] Test for `expected_exception` Authored by: at-wat Co-authored-by: Atsushi Watanabe --- test/test_download.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/test/test_download.py b/test/test_download.py index 6f00a4ded..253079249 100755 --- a/test/test_download.py +++ b/test/test_download.py @@ -31,6 +31,7 @@ from yt_dlp.utils import ( DownloadError, ExtractorError, UnavailableVideoError, + YoutubeDLError, format_bytes, join_nonempty, ) @@ -100,6 +101,8 @@ def generator(test_case, tname): print_skipping('IE marked as not _WORKING') for tc in test_cases: + if tc.get('expected_exception'): + continue info_dict = tc.get('info_dict', {}) params = tc.get('params', {}) if not info_dict.get('id'): @@ -139,6 +142,17 @@ def generator(test_case, tname): res_dict = None + def match_exception(err): + expected_exception = test_case.get('expected_exception') + if not expected_exception: + return False + if err.__class__.__name__ == expected_exception: + return True + for exc in err.exc_info: + if exc.__class__.__name__ == expected_exception: + return True + return False + def try_rm_tcs_files(tcs=None): if tcs is None: tcs = test_cases @@ -161,6 +175,8 @@ def generator(test_case, tname): except (DownloadError, ExtractorError) as err: # Check if the exception is not a network related one if not isinstance(err.exc_info[1], (TransportError, UnavailableVideoError)) or (isinstance(err.exc_info[1], HTTPError) and err.exc_info[1].status == 503): + if match_exception(err): + return err.msg = f'{getattr(err, "msg", err)} ({tname})' raise @@ -171,6 +187,10 @@ def generator(test_case, tname): print(f'Retrying: {try_num} failed tries\n\n##########\n\n') try_num += 1 + except YoutubeDLError as err: + if match_exception(err): + return + raise else: break From c1d71d0d9f41db5e4306c86af232f5f6220a130b Mon Sep 17 00:00:00 2001 From: Atsushi Watanabe Date: Fri, 22 Sep 2023 08:04:05 +0900 Subject: [PATCH 14/41] [ie/twitcasting] Support `--wait-for-video` (#7975) Authored by: at-wat --- yt_dlp/extractor/twitcasting.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py index 3890d5d8f..540e217fd 100644 --- a/yt_dlp/extractor/twitcasting.py +++ b/yt_dlp/extractor/twitcasting.py @@ -5,8 +5,9 @@ import re from .common import InfoExtractor from ..dependencies import websockets from ..utils import ( - clean_html, ExtractorError, + UserNotLive, + clean_html, float_or_none, get_element_by_class, get_element_by_id, @@ -235,6 +236,9 @@ class TwitCastingLiveIE(InfoExtractor): _TESTS = [{ 'url': 'https://twitcasting.tv/ivetesangalo', 'only_matching': True, + }, { + 'url': 'https://twitcasting.tv/c:unusedlive', + 'expected_exception': 'UserNotLive', }] def _real_extract(self, url): @@ -260,7 +264,7 @@ class TwitCastingLiveIE(InfoExtractor): r'(?s)\d+)"\s*>.+?', webpage, 'current live ID 2', default=None, group='video_id') if not current_live: - raise ExtractorError('The user is not currently live') + raise UserNotLive(video_id=uploader_id) return self.url_result('https://twitcasting.tv/%s/movie/%s' % (uploader_id, current_live)) From c2da0b5ea215298135f76e3dc14b972a3c4afacb Mon Sep 17 00:00:00 2001 From: bashonly Date: Sat, 23 Sep 2023 14:54:00 -0500 Subject: [PATCH 15/41] [ie/ArteTV] Fix HLS formats extraction Closes #8156 Authored by: bashonly --- yt_dlp/extractor/arte.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index e3cc5afb0..a19cd2a3a 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -169,7 +169,7 @@ class ArteTVIE(ArteTVBaseIE): ))) short_label = traverse_obj(stream_version, 'shortLabel', expected_type=str, default='?') - if stream['protocol'].startswith('HLS'): + if 'HLS' in stream['protocol']: fmts, subs = self._extract_m3u8_formats_and_subtitles( stream['url'], video_id=video_id, ext='mp4', m3u8_id=stream_version_code, fatal=False) for fmt in fmts: From 5ca095cbcde3e32642a4fe5b2d69e8e3c785a021 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 23 Sep 2023 15:00:31 -0500 Subject: [PATCH 16/41] [cleanup] Misc (#8182) Closes #7796, Closes #8028 Authored by: barsnick, sqrtNOT, gamer191, coletdjnz, Grub4K, bashonly --- CONTRIBUTING.md | 8 ++++---- README.md | 2 +- devscripts/make_changelog.py | 2 +- test/test_YoutubeDL.py | 1 - test/test_networking_utils.py | 6 +++--- yt_dlp/YoutubeDL.py | 6 +++--- yt_dlp/compat/urllib/__init__.py | 2 +- yt_dlp/extractor/abc.py | 1 - yt_dlp/extractor/ign.py | 4 ---- yt_dlp/extractor/nebula.py | 1 - yt_dlp/extractor/peekvids.py | 1 - yt_dlp/extractor/radiofrance.py | 2 +- yt_dlp/extractor/rcs.py | 6 +++--- yt_dlp/extractor/rokfin.py | 1 - yt_dlp/extractor/s4c.py | 2 -- yt_dlp/extractor/sovietscloset.py | 1 - yt_dlp/extractor/youtube.py | 2 +- yt_dlp/networking/__init__.py | 2 +- yt_dlp/networking/_urllib.py | 2 +- yt_dlp/networking/exceptions.py | 4 ++-- 20 files changed, 22 insertions(+), 34 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a8587fe92..90e7faf7c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -217,7 +217,7 @@ After you have ensured this site is distributing its content legally, you can fo 1. Add an import in [`yt_dlp/extractor/_extractors.py`](yt_dlp/extractor/_extractors.py). Note that the class name must end with `IE`. 1. Run `python test/test_download.py TestDownload.test_YourExtractor` (note that `YourExtractor` doesn't end with `IE`). This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, the tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. You can also run all the tests in one go with `TestDownload.test_YourExtractor_all` 1. Make sure you have atleast one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running. -1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L91-L426). Add tests and code for as many as you want. +1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L119-L440). Add tests and code for as many as you want. 1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart): $ flake8 yt_dlp/extractor/yourextractor.py @@ -251,7 +251,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou ### Mandatory and optional metafields -For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L91-L426) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp: +For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L119-L440) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp: - `id` (media identifier) - `title` (media title) @@ -696,7 +696,7 @@ formats = [ ### Use convenience conversion and parsing functions -Wrap all extracted numeric data into safe functions from [`yt_dlp/utils.py`](yt_dlp/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. +Wrap all extracted numeric data into safe functions from [`yt_dlp/utils/`](yt_dlp/utils/): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. Use `url_or_none` for safe URL processing. @@ -704,7 +704,7 @@ Use `traverse_obj` and `try_call` (superseeds `dict_get` and `try_get`) for safe Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. -Explore [`yt_dlp/utils.py`](yt_dlp/utils.py) for more useful convenience functions. +Explore [`yt_dlp/utils/`](yt_dlp/utils/) for more useful convenience functions. #### Examples diff --git a/README.md b/README.md index d94d8ea82..d9b11952d 100644 --- a/README.md +++ b/README.md @@ -1800,7 +1800,7 @@ The following extractors use this feature: #### youtube * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively -* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients. +* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients. * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py index ac68dcd19..9ff65db14 100644 --- a/devscripts/make_changelog.py +++ b/devscripts/make_changelog.py @@ -260,7 +260,7 @@ class CommitRange: AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE) MESSAGE_RE = re.compile(r''' (?:\[(?P[^\]]+)\]\ )? - (?:(?P`?[^:`]+`?): )? + (?:(?P`?[\w.-]+`?): )? (?P.+?) (?:\ \((?P\#\d+(?:,\ \#\d+)*)\))? ''', re.VERBOSE | re.DOTALL) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 3cfb61fb2..916ee48b9 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -631,7 +631,6 @@ class TestYoutubeDL(unittest.TestCase): self.assertEqual(test_dict['playlist'], 'funny videos') outtmpl_info = { - 'id': '1234', 'id': '1234', 'ext': 'mp4', 'width': None, diff --git a/test/test_networking_utils.py b/test/test_networking_utils.py index dbf656090..419aae1e4 100644 --- a/test/test_networking_utils.py +++ b/test/test_networking_utils.py @@ -269,14 +269,14 @@ class TestNetworkingExceptions: assert not response.closed def test_incomplete_read_error(self): - error = IncompleteRead(b'test', 3, cause='test') + error = IncompleteRead(4, 3, cause='test') assert isinstance(error, IncompleteRead) assert repr(error) == '' assert str(error) == error.msg == '4 bytes read, 3 more expected' - assert error.partial == b'test' + assert error.partial == 4 assert error.expected == 3 assert error.cause == 'test' - error = IncompleteRead(b'aaa') + error = IncompleteRead(3) assert repr(error) == '' assert str(error) == '3 bytes read' diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 1feed3052..39aaf2c2e 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -239,9 +239,9 @@ class YoutubeDL: 'selected' (check selected formats), or None (check only if requested by extractor) paths: Dictionary of output paths. The allowed keys are 'home' - 'temp' and the keys of OUTTMPL_TYPES (in utils.py) + 'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py) outtmpl: Dictionary of templates for output names. Allowed keys - are 'default' and the keys of OUTTMPL_TYPES (in utils.py). + are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py). For compatibility with youtube-dl, a single string can also be used outtmpl_na_placeholder: Placeholder for unavailable meta fields. restrictfilenames: Do not allow "&" and spaces in file names @@ -422,7 +422,7 @@ class YoutubeDL: asked whether to download the video. - Raise utils.DownloadCancelled(msg) to abort remaining downloads when a video is rejected. - match_filter_func in utils.py is one example for this. + match_filter_func in utils/_utils.py is one example for this. color: A Dictionary with output stream names as keys and their respective color policy as values. Can also just be a single color policy, diff --git a/yt_dlp/compat/urllib/__init__.py b/yt_dlp/compat/urllib/__init__.py index b27cc6133..9084b3c2b 100644 --- a/yt_dlp/compat/urllib/__init__.py +++ b/yt_dlp/compat/urllib/__init__.py @@ -1,7 +1,7 @@ # flake8: noqa: F405 from urllib import * # noqa: F403 -del request +del request # noqa: F821 from . import request # noqa: F401 from ..compat_utils import passthrough_module diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py index f56133eb3..d2cf5f7c5 100644 --- a/yt_dlp/extractor/abc.py +++ b/yt_dlp/extractor/abc.py @@ -180,7 +180,6 @@ class ABCIViewIE(InfoExtractor): _VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P[^/?#]+)' _GEO_COUNTRIES = ['AU'] - # ABC iview programs are normally available for 14 days only. _TESTS = [{ 'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00', 'md5': '67715ce3c78426b11ba167d875ac6abf', diff --git a/yt_dlp/extractor/ign.py b/yt_dlp/extractor/ign.py index 64875f8ce..1c4f105e9 100644 --- a/yt_dlp/extractor/ign.py +++ b/yt_dlp/extractor/ign.py @@ -197,10 +197,6 @@ class IGNVideoIE(IGNBaseIE): 'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg', 'duration': 298, 'tags': 'count:13', - 'display_id': '112203', - 'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg', - 'duration': 298, - 'tags': 'count:13', }, 'expected_warnings': ['HTTP Error 400: Bad Request'], }, { diff --git a/yt_dlp/extractor/nebula.py b/yt_dlp/extractor/nebula.py index 4f3e691b7..8fba2bcf7 100644 --- a/yt_dlp/extractor/nebula.py +++ b/yt_dlp/extractor/nebula.py @@ -127,7 +127,6 @@ class NebulaIE(NebulaBaseIE): 'channel_id': 'lindsayellis', 'uploader': 'Lindsay Ellis', 'uploader_id': 'lindsayellis', - 'timestamp': 1533009600, 'uploader_url': 'https://nebula.tv/lindsayellis', 'series': 'Lindsay Ellis', 'display_id': 'that-time-disney-remade-beauty-and-the-beast', diff --git a/yt_dlp/extractor/peekvids.py b/yt_dlp/extractor/peekvids.py index d1fc058b9..41f591b09 100644 --- a/yt_dlp/extractor/peekvids.py +++ b/yt_dlp/extractor/peekvids.py @@ -146,7 +146,6 @@ class PlayVidsIE(PeekVidsBaseIE): 'uploader': 'Brazzers', 'age_limit': 18, 'view_count': int, - 'age_limit': 18, 'categories': list, 'tags': list, }, diff --git a/yt_dlp/extractor/radiofrance.py b/yt_dlp/extractor/radiofrance.py index 35f4b91dd..ec1b97631 100644 --- a/yt_dlp/extractor/radiofrance.py +++ b/yt_dlp/extractor/radiofrance.py @@ -82,7 +82,7 @@ class RadioFranceBaseIE(InfoExtractor): def _extract_data_from_webpage(self, webpage, display_id, key): return traverse_obj(self._search_json( r'\bconst\s+data\s*=', webpage, key, display_id, - contains_pattern=r'(\[\{.*?\}\]);', transform_source=js_to_json), + contains_pattern=r'\[\{(?s:.+)\}\]', transform_source=js_to_json), (..., 'data', key, {dict}), get_all=False) or {} diff --git a/yt_dlp/extractor/rcs.py b/yt_dlp/extractor/rcs.py index 028d3d90b..b865f63fb 100644 --- a/yt_dlp/extractor/rcs.py +++ b/yt_dlp/extractor/rcs.py @@ -239,10 +239,10 @@ class RCSEmbedsIE(RCSBaseIE): } }, { 'url': 'https://video.gazzanet.gazzetta.it/video-embed/gazzanet-mo05-0000260789', - 'match_only': True + 'only_matching': True }, { 'url': 'https://video.gazzetta.it/video-embed/49612410-00ca-11eb-bcd8-30d4253e0140', - 'match_only': True + 'only_matching': True }] _WEBPAGE_TESTS = [{ 'url': 'https://www.iodonna.it/video-iodonna/personaggi-video/monica-bellucci-piu-del-lavoro-oggi-per-me-sono-importanti-lamicizia-e-la-famiglia/', @@ -325,7 +325,7 @@ class RCSIE(RCSBaseIE): } }, { 'url': 'https://video.corriere.it/video-360/metro-copenaghen-tutta-italiana/a248a7f0-e2db-11e9-9830-af2de6b1f945', - 'match_only': True + 'only_matching': True }] diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py index 4a4d40bef..cad76f0c9 100644 --- a/yt_dlp/extractor/rokfin.py +++ b/yt_dlp/extractor/rokfin.py @@ -40,7 +40,6 @@ class RokfinIE(InfoExtractor): 'channel': 'Jimmy Dore', 'channel_id': 65429, 'channel_url': 'https://rokfin.com/TheJimmyDoreShow', - 'duration': 213.0, 'availability': 'public', 'live_status': 'not_live', 'dislike_count': int, diff --git a/yt_dlp/extractor/s4c.py b/yt_dlp/extractor/s4c.py index 990ea2b44..67eff723b 100644 --- a/yt_dlp/extractor/s4c.py +++ b/yt_dlp/extractor/s4c.py @@ -78,7 +78,6 @@ class S4CSeriesIE(InfoExtractor): 'info_dict': { 'id': '864982911', 'title': 'Iaith ar Daith', - 'description': 'md5:e878ebf660dce89bd2ef521d7ce06397' }, }, { 'url': 'https://www.s4c.cymru/clic/series/866852587', @@ -86,7 +85,6 @@ class S4CSeriesIE(InfoExtractor): 'info_dict': { 'id': '866852587', 'title': 'FFIT Cymru', - 'description': 'md5:abcb3c129cb68dbb6cd304fd33b07e96' }, }] diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py index 453016ccb..493eea2a6 100644 --- a/yt_dlp/extractor/sovietscloset.py +++ b/yt_dlp/extractor/sovietscloset.py @@ -76,7 +76,6 @@ class SovietsClosetIE(SovietsClosetBaseIE): 'title': 'Arma 3 - Zeus Games #5', 'uploader': 'SovietWomble', 'thumbnail': r're:^https?://.*\.b-cdn\.net/c0e5e76f-3a93-40b4-bf01-12343c2eec5d/thumbnail\.jpg$', - 'uploader': 'SovietWomble', 'creator': 'SovietWomble', 'release_timestamp': 1461157200, 'release_date': '20160420', diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 023d8fd8c..a39d17cf1 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -902,7 +902,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago' """ - # XXX: this could be moved to a general function in utils.py + # XXX: this could be moved to a general function in utils/_utils.py # The relative time text strings are roughly the same as what # Javascript's Intl.RelativeTimeFormat function generates. # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat diff --git a/yt_dlp/networking/__init__.py b/yt_dlp/networking/__init__.py index 5e8876484..5b1599a6d 100644 --- a/yt_dlp/networking/__init__.py +++ b/yt_dlp/networking/__init__.py @@ -1,4 +1,4 @@ -# flake8: noqa: 401 +# flake8: noqa: F401 from .common import ( HEADRequest, PUTRequest, diff --git a/yt_dlp/networking/_urllib.py b/yt_dlp/networking/_urllib.py index c327f7744..9e2bf33e4 100644 --- a/yt_dlp/networking/_urllib.py +++ b/yt_dlp/networking/_urllib.py @@ -337,7 +337,7 @@ def handle_sslerror(e: ssl.SSLError): def handle_response_read_exceptions(e): if isinstance(e, http.client.IncompleteRead): - raise IncompleteRead(partial=e.partial, cause=e, expected=e.expected) from e + raise IncompleteRead(partial=len(e.partial), cause=e, expected=e.expected) from e elif isinstance(e, ssl.SSLError): handle_sslerror(e) elif isinstance(e, (OSError, EOFError, http.client.HTTPException, *CONTENT_DECODE_ERRORS)): diff --git a/yt_dlp/networking/exceptions.py b/yt_dlp/networking/exceptions.py index 465b18ba9..f58dc246e 100644 --- a/yt_dlp/networking/exceptions.py +++ b/yt_dlp/networking/exceptions.py @@ -75,10 +75,10 @@ class HTTPError(RequestError): class IncompleteRead(TransportError): - def __init__(self, partial, expected=None, **kwargs): + def __init__(self, partial: int, expected: int = None, **kwargs): self.partial = partial self.expected = expected - msg = f'{len(partial)} bytes read' + msg = f'{partial} bytes read' if expected is not None: msg += f', {expected} more expected' From eaee21bf71889d495076037cbe590c8c0b21ef3a Mon Sep 17 00:00:00 2001 From: garret Date: Sat, 23 Sep 2023 23:13:48 +0100 Subject: [PATCH 17/41] [ie/Monstercat] Add extractor (#8133) Closes #8067 Authored by: garret1317 --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/monstercat.py | 79 +++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 yt_dlp/extractor/monstercat.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 9cda06d8f..691cac339 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1126,6 +1126,7 @@ from .mofosex import ( MofosexEmbedIE, ) from .mojvideo import MojvideoIE +from .monstercat import MonstercatIE from .morningstar import MorningstarIE from .motherless import ( MotherlessIE, diff --git a/yt_dlp/extractor/monstercat.py b/yt_dlp/extractor/monstercat.py new file mode 100644 index 000000000..7f04825fc --- /dev/null +++ b/yt_dlp/extractor/monstercat.py @@ -0,0 +1,79 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + extract_attributes, + get_element_by_class, + get_element_html_by_class, + get_element_text_and_html_by_tag, + int_or_none, + unified_strdate, + strip_or_none, + traverse_obj, + try_call, +) + + +class MonstercatIE(InfoExtractor): + _VALID_URL = r'https://www\.monstercat\.com/release/(?P\d+)' + _TESTS = [{ + 'url': 'https://www.monstercat.com/release/742779548009', + 'playlist_count': 20, + 'info_dict': { + 'title': 'The Secret Language of Trees', + 'id': '742779548009', + 'thumbnail': 'https://www.monstercat.com/release/742779548009/cover', + 'release_year': 2023, + 'release_date': '20230711', + 'album': 'The Secret Language of Trees', + 'album_artist': 'BT', + } + }] + + def _extract_tracks(self, table, album_meta): + for td in re.findall(r'((?:(?!)[\w\W])+)', table): # regex by chatgpt due to lack of get_elements_by_tag + title = clean_html(try_call( + lambda: get_element_by_class('d-inline-flex flex-column', td).partition(' Date: Sun, 24 Sep 2023 06:15:01 +0800 Subject: [PATCH 18/41] [ie/PIAULIZAPortal] Add extractor (#7903) Authored by: pzhlkj6612 --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/piaulizaportal.py | 70 ++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 yt_dlp/extractor/piaulizaportal.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 691cac339..49c35cf71 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1452,6 +1452,7 @@ from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE from .piapro import PiaproIE +from .piaulizaportal import PIAULIZAPortalIE from .picarto import ( PicartoIE, PicartoVodIE, diff --git a/yt_dlp/extractor/piaulizaportal.py b/yt_dlp/extractor/piaulizaportal.py new file mode 100644 index 000000000..1eb6d92b7 --- /dev/null +++ b/yt_dlp/extractor/piaulizaportal.py @@ -0,0 +1,70 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + parse_qs, + time_seconds, + traverse_obj, +) + + +class PIAULIZAPortalIE(InfoExtractor): + IE_DESC = 'ulizaportal.jp - PIA LIVE STREAM' + _VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' + _TESTS = [{ + 'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44', + 'info_dict': { + 'id': '005f18b7-e810-5618-cb82-0987c5755d44', + 'title': 'プレゼンテーションプレイヤーのサンプル', + 'live_status': 'not_live', + }, + 'params': { + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + }, { + 'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1', + 'info_dict': { + 'id': '005e1b23-fe93-5780-19a0-98e917cc4b7d', + 'title': '【確認用】視聴サンプルページ(ULIZA)', + 'live_status': 'not_live', + }, + 'params': { + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0))) + if expires and expires <= time_seconds(): + raise ExtractorError('The link is expired.', video_id=video_id, expected=True) + + webpage = self._download_webpage(url, video_id) + + player_data = self._download_webpage( + self._search_regex( + r'' _ANVATO_PREFIX = 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:' + _CLIENT_DATA = { + 'clientKey': '4cFUW6DmwJpzT9L7LrG3qRAcABG5s04g', + 'clientSecret': 'CZuvCL49d9OwfGsR', + 'deviceId': str(uuid.uuid4()), + 'deviceInfo': base64.b64encode(json.dumps({ + 'model': 'desktop', + 'version': 'Chrome', + 'osName': 'Windows', + 'osVersion': '10.0', + }, separators=(',', ':')).encode()).decode(), + 'networkType': 'other', + 'nflClaimGroupsToAdd': [], + 'nflClaimGroupsToRemove': [], + } + _ACCOUNT_INFO = {} + _API_KEY = None + + _TOKEN = None + _TOKEN_EXPIRY = 0 + + def _get_account_info(self, url, slug): + if not self._API_KEY: + webpage = self._download_webpage(url, slug, fatal=False) or '' + self._API_KEY = self._search_regex( + r'window\.gigyaApiKey\s*=\s*["\'](\w+)["\'];', webpage, 'API key', + fatal=False) or '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f' + + cookies = self._get_cookies('https://auth-id.nfl.com/') + login_token = traverse_obj(cookies, ( + (f'glt_{self._API_KEY}', lambda k, _: k.startswith('glt_')), {lambda x: x.value}), get_all=False) + if not login_token: + self.raise_login_required() + if 'ucid' not in cookies: + raise ExtractorError( + 'Required cookies for the auth-id.nfl.com domain were not found among passed cookies. ' + 'If using --cookies, these cookies must be exported along with .nfl.com cookies, ' + 'or else try using --cookies-from-browser instead', expected=True) + + account = self._download_json( + 'https://auth-id.nfl.com/accounts.getAccountInfo', slug, + note='Downloading account info', data=urlencode_postdata({ + 'include': 'profile,data', + 'lang': 'en', + 'APIKey': self._API_KEY, + 'sdk': 'js_latest', + 'login_token': login_token, + 'authMode': 'cookie', + 'pageURL': url, + 'sdkBuild': traverse_obj(cookies, ( + 'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='15170'), + 'format': 'json', + }), headers={'Content-Type': 'application/x-www-form-urlencoded'}) + + self._ACCOUNT_INFO = traverse_obj(account, { + 'signatureTimestamp': 'signatureTimestamp', + 'uid': 'UID', + 'uidSignature': 'UIDSignature', + }) + + if len(self._ACCOUNT_INFO) != 3: + raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True) + + def _get_auth_token(self, url, slug): + if self._TOKEN and self._TOKEN_EXPIRY > int(time.time() + 30): + return + + if not self._ACCOUNT_INFO: + self._get_account_info(url, slug) + + token = self._download_json( + 'https://api.nfl.com/identity/v3/token%s' % ( + '/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''), + slug, headers={'Content-Type': 'application/json'}, note='Downloading access token', + data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode()) + + self._TOKEN = token['accessToken'] + self._TOKEN_EXPIRY = token['expiresIn'] + self._ACCOUNT_INFO['refreshToken'] = token['refreshToken'] + def _parse_video_config(self, video_config, display_id): video_config = self._parse_json(video_config, display_id) item = video_config['playlist'][0] @@ -168,7 +247,7 @@ class NFLArticleIE(NFLBaseIE): class NFLPlusReplayIE(NFLBaseIE): IE_NAME = 'nfl.com:plus:replay' - _VALID_URL = r'https?://(?:www\.)?nfl.com/plus/games/[\w-]+/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?nfl.com/plus/games/(?P[\w-]+)(?:/(?P\d+))?' _TESTS = [{ 'url': 'https://www.nfl.com/plus/games/giants-at-vikings-2022-post-1/1572108', 'info_dict': { @@ -185,23 +264,92 @@ class NFLPlusReplayIE(NFLBaseIE): 'thumbnail': r're:^https?://.*\.jpg', }, 'params': {'skip_download': 'm3u8'}, + }, { + 'note': 'Subscription required', + 'url': 'https://www.nfl.com/plus/games/giants-at-vikings-2022-post-1', + 'playlist_count': 4, + 'info_dict': { + 'id': 'giants-at-vikings-2022-post-1', + }, + }, { + 'note': 'Subscription required', + 'url': 'https://www.nfl.com/plus/games/giants-at-patriots-2011-pre-4', + 'playlist_count': 2, + 'info_dict': { + 'id': 'giants-at-patriots-2011-pre-4', + }, + }, { + 'note': 'Subscription required', + 'url': 'https://www.nfl.com/plus/games/giants-at-patriots-2011-pre-4', + 'info_dict': { + 'id': '950701', + 'ext': 'mp4', + 'title': 'Giants @ Patriots', + 'description': 'Giants at Patriots on September 01, 2011', + 'uploader': 'NFL', + 'upload_date': '20210724', + 'timestamp': 1627085874, + 'duration': 1532, + 'categories': ['Game Highlights'], + 'tags': ['play-by-play'], + 'thumbnail': r're:^https?://.*\.jpg', + }, + 'params': { + 'skip_download': 'm3u8', + 'extractor_args': {'nflplusreplay': {'type': ['condensed_game']}}, + }, }] + _REPLAY_TYPES = { + 'full_game': 'Full Game', + 'full_game_spanish': 'Full Game - Spanish', + 'condensed_game': 'Condensed Game', + 'all_22': 'All-22', + } + def _real_extract(self, url): - video_id = self._match_id(url) - return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id) + slug, video_id = self._match_valid_url(url).group('slug', 'id') + requested_types = self._configuration_arg('type', ['all']) + if 'all' in requested_types: + requested_types = list(self._REPLAY_TYPES.keys()) + requested_types = traverse_obj(self._REPLAY_TYPES, (None, requested_types)) + + if not video_id: + self._get_auth_token(url, slug) + headers = {'Authorization': f'Bearer {self._TOKEN}'} + game_id = self._download_json( + f'https://api.nfl.com/football/v2/games/externalId/slug/{slug}', slug, + 'Downloading game ID', query={'withExternalIds': 'true'}, headers=headers)['id'] + replays = self._download_json( + 'https://api.nfl.com/content/v1/videos/replays', slug, 'Downloading replays JSON', + query={'gameId': game_id}, headers=headers) + if len(requested_types) == 1: + video_id = traverse_obj(replays, ( + 'items', lambda _, v: v['subType'] == requested_types[0], 'mcpPlaybackId'), get_all=False) + + if video_id: + return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id) + + def entries(): + for replay in traverse_obj( + replays, ('items', lambda _, v: v['mcpPlaybackId'] and v['subType'] in requested_types) + ): + video_id = replay['mcpPlaybackId'] + yield self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id) + + return self.playlist_result(entries(), slug) class NFLPlusEpisodeIE(NFLBaseIE): IE_NAME = 'nfl.com:plus:episode' _VALID_URL = r'https?://(?:www\.)?nfl.com/plus/episodes/(?P[\w-]+)' _TESTS = [{ - 'note': 'premium content', + 'note': 'Subscription required', 'url': 'https://www.nfl.com/plus/episodes/kurt-s-qb-insider-conference-championships', 'info_dict': { 'id': '1576832', 'ext': 'mp4', - 'title': 'Kurt\'s QB Insider: Conference Championships', + 'title': 'Conference Championships', 'description': 'md5:944f7fab56f7a37430bf8473f5473857', 'uploader': 'NFL', 'upload_date': '20230127', @@ -214,85 +362,9 @@ class NFLPlusEpisodeIE(NFLBaseIE): 'params': {'skip_download': 'm3u8'}, }] - _CLIENT_DATA = { - 'clientKey': '4cFUW6DmwJpzT9L7LrG3qRAcABG5s04g', - 'clientSecret': 'CZuvCL49d9OwfGsR', - 'deviceId': str(uuid.uuid4()), - 'deviceInfo': base64.b64encode(json.dumps({ - 'model': 'desktop', - 'version': 'Chrome', - 'osName': 'Windows', - 'osVersion': '10.0', - }, separators=(',', ':')).encode()).decode(), - 'networkType': 'other', - 'nflClaimGroupsToAdd': [], - 'nflClaimGroupsToRemove': [], - } - _ACCOUNT_INFO = {} - _API_KEY = None - - _TOKEN = None - _TOKEN_EXPIRY = 0 - - def _get_account_info(self, url, video_id): - cookies = self._get_cookies('https://www.nfl.com/') - login_token = traverse_obj(cookies, ( - (f'glt_{self._API_KEY}', f'gig_loginToken_{self._API_KEY}', - lambda k, _: k.startswith('glt_') or k.startswith('gig_loginToken_')), - {lambda x: x.value}), get_all=False) - if not login_token: - self.raise_login_required() - - account = self._download_json( - 'https://auth-id.nfl.com/accounts.getAccountInfo', video_id, - note='Downloading account info', data=urlencode_postdata({ - 'include': 'profile,data', - 'lang': 'en', - 'APIKey': self._API_KEY, - 'sdk': 'js_latest', - 'login_token': login_token, - 'authMode': 'cookie', - 'pageURL': url, - 'sdkBuild': traverse_obj(cookies, ( - 'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='13642'), - 'format': 'json', - }), headers={'Content-Type': 'application/x-www-form-urlencoded'}) - - self._ACCOUNT_INFO = traverse_obj(account, { - 'signatureTimestamp': 'signatureTimestamp', - 'uid': 'UID', - 'uidSignature': 'UIDSignature', - }) - - if len(self._ACCOUNT_INFO) != 3: - raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True) - - def _get_auth_token(self, url, video_id): - if not self._ACCOUNT_INFO: - self._get_account_info(url, video_id) - - token = self._download_json( - 'https://api.nfl.com/identity/v3/token%s' % ( - '/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''), - video_id, headers={'Content-Type': 'application/json'}, note='Downloading access token', - data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode()) - - self._TOKEN = token['accessToken'] - self._TOKEN_EXPIRY = token['expiresIn'] - self._ACCOUNT_INFO['refreshToken'] = token['refreshToken'] - def _real_extract(self, url): slug = self._match_id(url) - - if not self._API_KEY: - webpage = self._download_webpage(url, slug, fatal=False) or '' - self._API_KEY = self._search_regex( - r'window\.gigyaApiKey=["\'](\w+)["\'];', webpage, 'API key', - default='3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f') - - if not self._TOKEN or self._TOKEN_EXPIRY <= int(time.time()): - self._get_auth_token(url, slug) - + self._get_auth_token(url, slug) video_id = self._download_json( f'https://api.nfl.com/content/v1/videos/episodes/{slug}', slug, headers={ 'Authorization': f'Bearer {self._TOKEN}', From 61bdf15fc7400601c3da1aa7a43917310a5bf391 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Sun, 24 Sep 2023 02:24:47 +0200 Subject: [PATCH 23/41] [core] Raise minimum recommended Python version to 3.8 (#8183) Authored by: Grub4K --- devscripts/changelog_override.json | 5 +++++ test/test_execution.py | 3 +++ yt_dlp/YoutubeDL.py | 16 ++++------------ yt_dlp/update.py | 25 +++++++++++++++++++++++++ 4 files changed, 37 insertions(+), 12 deletions(-) diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index e7f453acf..9dfbf510f 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -88,5 +88,10 @@ "when": "59e92b1f1833440bb2190f847eb735cf0f90bc85", "short": "[rh:urllib] Simplify gzip decoding (#7611)", "authors": ["Grub4K"] + }, + { + "action": "add", + "when": "c1d71d0d9f41db5e4306c86af232f5f6220a130b", + "short": "[priority] **The minimum *recommended* Python version has been raised to 3.8**\nSince Python 3.7 has reached end-of-life, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/7803)" } ] diff --git a/test/test_execution.py b/test/test_execution.py index 7a9e800b6..fb2f6e2e9 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -45,6 +45,9 @@ class TestExecution(unittest.TestCase): self.assertTrue(os.path.exists(LAZY_EXTRACTORS)) _, stderr = self.run_yt_dlp(opts=('-s', 'test:')) + # `MIN_RECOMMENDED` emits a deprecated feature warning for deprecated python versions + if stderr and stderr.startswith('Deprecated Feature: Support for Python'): + stderr = '' self.assertFalse(stderr) subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=subprocess.DEVNULL) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 39aaf2c2e..f322b12a2 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -60,7 +60,7 @@ from .postprocessor import ( get_postprocessor, ) from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping -from .update import REPOSITORY, current_git_head, detect_variant +from .update import REPOSITORY, _get_system_deprecation, current_git_head, detect_variant from .utils import ( DEFAULT_OUTTMPL, IDENTITY, @@ -640,17 +640,9 @@ class YoutubeDL: for name, stream in self._out_files.items_ if name != 'console' }) - # The code is left like this to be reused for future deprecations - MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7) - current_version = sys.version_info[:2] - if current_version < MIN_RECOMMENDED: - msg = ('Support for Python version %d.%d has been deprecated. ' - 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.' - '\n You will no longer receive updates on this version') - if current_version < MIN_SUPPORTED: - msg = 'Python version %d.%d is no longer supported' - self.deprecated_feature( - f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED)) + system_deprecation = _get_system_deprecation() + if system_deprecation: + self.deprecated_feature(system_deprecation.replace('\n', '\n ')) if self.params.get('allow_unplayable_formats'): self.report_warning( diff --git a/yt_dlp/update.py b/yt_dlp/update.py index d708b09e3..db79df127 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -112,6 +112,31 @@ def is_non_updateable(): detect_variant(), _NON_UPDATEABLE_REASONS['unknown' if VARIANT else 'other']) +def _get_system_deprecation(): + MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 8) + + if sys.version_info > MIN_RECOMMENDED: + return None + + major, minor = sys.version_info[:2] + if sys.version_info < MIN_SUPPORTED: + msg = f'Python version {major}.{minor} is no longer supported' + else: + msg = f'Support for Python version {major}.{minor} has been deprecated. ' + # Temporary until `win_x86_exe` uses 3.8, which will deprecate Vista and Server 2008 + if detect_variant() == 'win_x86_exe': + platform_name = platform.platform() + if any(platform_name.startswith(f'Windows-{name}') for name in ('Vista', '2008Server')): + msg = 'Support for Windows Vista/Server 2008 has been deprecated. ' + else: + return None + msg += ('See https://github.com/yt-dlp/yt-dlp/issues/7803 for details.' + '\nYou may stop receiving updates on this version at any time') + + major, minor = MIN_RECOMMENDED + return f'{msg}! Please update to Python {major}.{minor} or above' + + def _sha256_file(path): h = hashlib.sha256() mv = memoryview(bytearray(128 * 1024)) From de015e930747165dbb8fcd360f8775fd973b7d6e Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Sun, 24 Sep 2023 02:29:01 +0200 Subject: [PATCH 24/41] [core] Prevent RCE when using `--exec` with `%q` (CVE-2023-40581) The shell escape function is now using `""` instead of `\"`. `utils.Popen` has been patched to properly quote commands. Prior to this fix using `--exec` together with `%q` when on Windows could cause remote code to execute. See https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg for reference. Authored by: Grub4K --- devscripts/changelog_override.json | 5 +++++ test/test_YoutubeDL.py | 6 +++--- test/test_utils.py | 16 ++++++++++++++++ yt_dlp/compat/__init__.py | 2 +- yt_dlp/postprocessor/exec.py | 12 +++++------- yt_dlp/utils/_utils.py | 18 ++++++++++++++++-- 6 files changed, 46 insertions(+), 13 deletions(-) diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index 9dfbf510f..fe0c82c66 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -93,5 +93,10 @@ "action": "add", "when": "c1d71d0d9f41db5e4306c86af232f5f6220a130b", "short": "[priority] **The minimum *recommended* Python version has been raised to 3.8**\nSince Python 3.7 has reached end-of-life, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/7803)" + }, + { + "action": "add", + "when": "61bdf15fc7400601c3da1aa7a43917310a5bf391", + "short": "[priority] Security: [[CVE-2023-40581](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-40581)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg)\n - The shell escape function is now using `\"\"` instead of `\\\"`.\n - `utils.Popen` has been patched to properly quote commands." } ] diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 916ee48b9..0cf130db0 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -784,9 +784,9 @@ class TestYoutubeDL(unittest.TestCase): test('%(title4)#S', 'foo_bar_test') test('%(title4).10S', ('foo "bar" ', 'foo "bar"' + ('#' if compat_os_name == 'nt' else ' '))) if compat_os_name == 'nt': - test('%(title4)q', ('"foo \\"bar\\" test"', ""foo ⧹"bar⧹" test"")) - test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', '"id 1" "id 2" "id 3"')) - test('%(formats.0.id)#q', ('"id 1"', '"id 1"')) + test('%(title4)q', ('"foo ""bar"" test"', None)) + test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', None)) + test('%(formats.0.id)#q', ('"id 1"', None)) else: test('%(title4)q', ('\'foo "bar" test\'', '\'foo "bar" test\'')) test('%(formats.:.id)#q', "'id 1' 'id 2' 'id 3'") diff --git a/test/test_utils.py b/test/test_utils.py index 47d1f71bf..dc2d8ce12 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -14,6 +14,7 @@ import contextlib import io import itertools import json +import subprocess import xml.etree.ElementTree from yt_dlp.compat import ( @@ -28,6 +29,7 @@ from yt_dlp.utils import ( InAdvancePagedList, LazyList, OnDemandPagedList, + Popen, age_restricted, args_to_str, base_url, @@ -2388,6 +2390,20 @@ Line 1 assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=') assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz') + @unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows') + def test_Popen_windows_escaping(self): + def run_shell(args): + stdout, stderr, error = Popen.run( + args, text=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + assert not stderr + assert not error + return stdout + + # Test escaping + assert run_shell(['echo', 'test"&']) == '"test""&"\n' + # Test if delayed expansion is disabled + assert run_shell(['echo', '^!']) == '"^!"\n' + assert run_shell('echo "^!"') == '"^!"\n' if __name__ == '__main__': unittest.main() diff --git a/yt_dlp/compat/__init__.py b/yt_dlp/compat/__init__.py index 832a9138d..5ad5c70ec 100644 --- a/yt_dlp/compat/__init__.py +++ b/yt_dlp/compat/__init__.py @@ -30,7 +30,7 @@ compat_os_name = os._name if os.name == 'java' else os.name if compat_os_name == 'nt': def compat_shlex_quote(s): import re - return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"') + return s if re.match(r'^[-_\w./]+$', s) else s.replace('"', '""').join('""') else: from shlex import quote as compat_shlex_quote # noqa: F401 diff --git a/yt_dlp/postprocessor/exec.py b/yt_dlp/postprocessor/exec.py index cfc83167c..c2e73fbab 100644 --- a/yt_dlp/postprocessor/exec.py +++ b/yt_dlp/postprocessor/exec.py @@ -1,8 +1,6 @@ -import subprocess - from .common import PostProcessor from ..compat import compat_shlex_quote -from ..utils import PostProcessingError, encodeArgument, variadic +from ..utils import Popen, PostProcessingError, variadic class ExecPP(PostProcessor): @@ -27,10 +25,10 @@ class ExecPP(PostProcessor): def run(self, info): for tmpl in self.exec_cmd: cmd = self.parse_cmd(tmpl, info) - self.to_screen('Executing command: %s' % cmd) - retCode = subprocess.call(encodeArgument(cmd), shell=True) - if retCode != 0: - raise PostProcessingError('Command returned error code %d' % retCode) + self.to_screen(f'Executing command: {cmd}') + _, _, return_code = Popen.run(cmd, shell=True) + if return_code != 0: + raise PostProcessingError(f'Command returned error code {return_code}') return [], info diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 213ccc636..ba6242380 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -825,7 +825,7 @@ class Popen(subprocess.Popen): _fix('LD_LIBRARY_PATH') # Linux _fix('DYLD_LIBRARY_PATH') # macOS - def __init__(self, *args, env=None, text=False, **kwargs): + def __init__(self, args, *remaining, env=None, text=False, shell=False, **kwargs): if env is None: env = os.environ.copy() self._fix_pyinstaller_ld_path(env) @@ -835,7 +835,21 @@ class Popen(subprocess.Popen): kwargs['universal_newlines'] = True # For 3.6 compatibility kwargs.setdefault('encoding', 'utf-8') kwargs.setdefault('errors', 'replace') - super().__init__(*args, env=env, **kwargs, startupinfo=self._startupinfo) + + if shell and compat_os_name == 'nt' and kwargs.get('executable') is None: + if not isinstance(args, str): + args = ' '.join(compat_shlex_quote(a) for a in args) + shell = False + args = f'{self.__comspec()} /Q /S /D /V:OFF /C "{args}"' + + super().__init__(args, *remaining, env=env, shell=shell, **kwargs, startupinfo=self._startupinfo) + + def __comspec(self): + comspec = os.environ.get('ComSpec') or os.path.join( + os.environ.get('SystemRoot', ''), 'System32', 'cmd.exe') + if os.path.isabs(comspec): + return comspec + raise FileNotFoundError('shell not found: neither %ComSpec% nor %SystemRoot% is set') def communicate_or_kill(self, *args, **kwargs): try: From 088add9567d39b758737e4299a0e619fd89d2e8f Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Sun, 24 Sep 2023 02:35:23 +0200 Subject: [PATCH 25/41] [cleanup] Misc Authored by: Grub4K --- test/test_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_utils.py b/test/test_utils.py index dc2d8ce12..fd612ff86 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -2405,5 +2405,6 @@ Line 1 assert run_shell(['echo', '^!']) == '"^!"\n' assert run_shell('echo "^!"') == '"^!"\n' + if __name__ == '__main__': unittest.main() From c54ddfba0f7d68034339426223d75373c5fc86df Mon Sep 17 00:00:00 2001 From: github-actions Date: Sun, 24 Sep 2023 00:38:42 +0000 Subject: [PATCH 26/41] Release 2023.09.24 Created by: Grub4K :ci skip all :ci run dl --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 8 +- .../ISSUE_TEMPLATE/2_site_support_request.yml | 8 +- .../ISSUE_TEMPLATE/3_site_feature_request.yml | 8 +- .github/ISSUE_TEMPLATE/4_bug_report.yml | 8 +- .github/ISSUE_TEMPLATE/5_feature_request.yml | 8 +- .github/ISSUE_TEMPLATE/6_question.yml | 8 +- CONTRIBUTORS | 36 ++++ Changelog.md | 196 ++++++++++++++++++ supportedsites.md | 49 ++++- yt_dlp/version.py | 4 +- 10 files changed, 298 insertions(+), 35 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index dd1b33dde..f0fc71d57 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -18,7 +18,7 @@ body: options: - label: I'm reporting that yt-dlp is broken on a **supported** site required: true - - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -64,7 +64,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -72,8 +72,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.07.06, Current version: 2023.07.06 - yt-dlp is up to date (2023.07.06) + Latest version: 2023.09.24, Current version: 2023.09.24 + yt-dlp is up to date (2023.09.24) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index 4f4378924..ac9a72a1c 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -18,7 +18,7 @@ body: options: - label: I'm reporting a new site support request required: true - - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -76,7 +76,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -84,8 +84,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.07.06, Current version: 2023.07.06 - yt-dlp is up to date (2023.07.06) + Latest version: 2023.09.24, Current version: 2023.09.24 + yt-dlp is up to date (2023.09.24) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index 05b4dd23b..577e4d491 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -18,7 +18,7 @@ body: options: - label: I'm requesting a site-specific feature required: true - - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -72,7 +72,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -80,8 +80,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.07.06, Current version: 2023.07.06 - yt-dlp is up to date (2023.07.06) + Latest version: 2023.09.24, Current version: 2023.09.24 + yt-dlp is up to date (2023.09.24) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index 880f1014c..9529c1bd6 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -18,7 +18,7 @@ body: options: - label: I'm reporting a bug unrelated to a specific site required: true - - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -57,7 +57,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -65,8 +65,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.07.06, Current version: 2023.07.06 - yt-dlp is up to date (2023.07.06) + Latest version: 2023.09.24, Current version: 2023.09.24 + yt-dlp is up to date (2023.09.24) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index acb11795f..b17a6e046 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -20,7 +20,7 @@ body: required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true @@ -53,7 +53,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -61,7 +61,7 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.07.06, Current version: 2023.07.06 - yt-dlp is up to date (2023.07.06) + Latest version: 2023.09.24, Current version: 2023.09.24 + yt-dlp is up to date (2023.09.24) render: shell diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml index a2563e975..5345e8917 100644 --- a/.github/ISSUE_TEMPLATE/6_question.yml +++ b/.github/ISSUE_TEMPLATE/6_question.yml @@ -26,7 +26,7 @@ body: required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates required: true @@ -59,7 +59,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -67,7 +67,7 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.07.06, Current version: 2023.07.06 - yt-dlp is up to date (2023.07.06) + Latest version: 2023.09.24, Current version: 2023.09.24 + yt-dlp is up to date (2023.09.24) render: shell diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 6b9b9f470..72b9584ec 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -467,3 +467,39 @@ rdamas RfadnjdExt urectanc nao20010128nao/Lesmiscore +04-pasha-04 +aaruni96 +aky-01 +AmirAflak +ApoorvShah111 +at-wat +davinkevin +demon071 +denhotte +FinnRG +fireattack +Frankgoji +GD-Slime +hatsomatt +ifan-t +kshitiz305 +kylegustavo +mabdelfattah +nathantouze +niemands +Rajeshwaran2001 +RedDeffender +Rohxn16 +sb0stn +SevenLives +simon300000 +snixon +soundchaser128 +szabyg +trainman261 +trislee +wader +Yalab7 +zhallgato +zhong-yiyu +Zprokkel diff --git a/Changelog.md b/Changelog.md index 32cdaca2a..04511927f 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,202 @@ # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2023.09.24 + +#### Important changes +- **The minimum *recommended* Python version has been raised to 3.8** +Since Python 3.7 has reached end-of-life, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/7803) +- Security: [[CVE-2023-40581](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-40581)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg) + - The shell escape function is now using `""` instead of `\"`. + - `utils.Popen` has been patched to properly quote commands. + +#### Core changes +- [Fix HTTP headers and cookie handling](https://github.com/yt-dlp/yt-dlp/commit/6c5211cebeacfc53ad5d5ddf4a659be76039656f) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +- [Fix `--check-formats`](https://github.com/yt-dlp/yt-dlp/commit/8cb7fc44db010e965d808ee679ef0725cb6e147c) by [pukkandan](https://github.com/pukkandan) +- [Fix support for upcoming Python 3.12](https://github.com/yt-dlp/yt-dlp/commit/836e06d246512f286f30c1371b2c54b72c9ecd93) ([#8130](https://github.com/yt-dlp/yt-dlp/issues/8130)) by [Grub4K](https://github.com/Grub4K) +- [Merged with youtube-dl 66ab08](https://github.com/yt-dlp/yt-dlp/commit/9d6254069c75877bc88bc3584f4326fb1853a543) by [coletdjnz](https://github.com/coletdjnz) +- [Prevent RCE when using `--exec` with `%q` (CVE-2023-40581)](https://github.com/yt-dlp/yt-dlp/commit/de015e930747165dbb8fcd360f8775fd973b7d6e) by [Grub4K](https://github.com/Grub4K) +- [Raise minimum recommended Python version to 3.8](https://github.com/yt-dlp/yt-dlp/commit/61bdf15fc7400601c3da1aa7a43917310a5bf391) ([#8183](https://github.com/yt-dlp/yt-dlp/issues/8183)) by [Grub4K](https://github.com/Grub4K) +- [`FFmpegFixupM3u8PP` may need to run with ffmpeg](https://github.com/yt-dlp/yt-dlp/commit/f73c11803579889dc8e1c99e25dba9a22fef39d8) by [pukkandan](https://github.com/pukkandan) +- **compat** + - [Add `types.NoneType`](https://github.com/yt-dlp/yt-dlp/commit/e0c4db04dc82a699bdabd9821ddc239ebe17d30a) by [pukkandan](https://github.com/pukkandan) (With fixes in [25b6e8f](https://github.com/yt-dlp/yt-dlp/commit/25b6e8f94679b4458550702b46e61249b875a4fd)) + - [Deprecate old functions](https://github.com/yt-dlp/yt-dlp/commit/3d2623a898196640f7cc0fc8b70118ff19e6925d) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) + - [Ensure submodules are imported correctly](https://github.com/yt-dlp/yt-dlp/commit/a250b247334ce9f641e709cbb64974da6034a2b3) by [pukkandan](https://github.com/pukkandan) +- **cookies**: [Containers JSON should be opened as utf-8](https://github.com/yt-dlp/yt-dlp/commit/dab87ca23650fd87184ff5286b53e6985b59f71d) ([#7800](https://github.com/yt-dlp/yt-dlp/issues/7800)) by [bashonly](https://github.com/bashonly) +- **dependencies**: [Handle deprecation of `sqlite3.version`](https://github.com/yt-dlp/yt-dlp/commit/35f9a306e6934793cff100200cd03f288ec33f11) ([#8167](https://github.com/yt-dlp/yt-dlp/issues/8167)) by [bashonly](https://github.com/bashonly) +- **outtmpl**: [Fix replacement for `playlist_index`](https://github.com/yt-dlp/yt-dlp/commit/a264433c9fba147ecae2420091614186cfeeb895) by [pukkandan](https://github.com/pukkandan) +- **utils** + - [Add temporary shim for logging](https://github.com/yt-dlp/yt-dlp/commit/1b392f905d20ef1f1b300b180f867d43c9ce49b8) by [pukkandan](https://github.com/pukkandan) + - [Improve `parse_duration`](https://github.com/yt-dlp/yt-dlp/commit/af86873218c24c3859ccf575a87f2b00a73b49d0) by [bashonly](https://github.com/bashonly) + - HTTPHeaderDict: [Handle byte values](https://github.com/yt-dlp/yt-dlp/commit/3f7965105d8d2048359e67c1e8b8ebd51588143b) by [pukkandan](https://github.com/pukkandan) + - `clean_podcast_url`: [Handle more trackers](https://github.com/yt-dlp/yt-dlp/commit/2af4eeb77246b8183aae75a0a8d19f18c08115b2) ([#7556](https://github.com/yt-dlp/yt-dlp/issues/7556)) by [bashonly](https://github.com/bashonly), [mabdelfattah](https://github.com/mabdelfattah) + - `js_to_json`: [Handle `Array` objects](https://github.com/yt-dlp/yt-dlp/commit/52414d64ca7b92d3f83964cdd68247989b0c4625) by [Grub4K](https://github.com/Grub4K), [std-move](https://github.com/std-move) + +#### Extractor changes +- [Extract subtitles from SMIL manifests](https://github.com/yt-dlp/yt-dlp/commit/550e65410a7a1b105923494ac44460a4dc1a15d9) ([#7667](https://github.com/yt-dlp/yt-dlp/issues/7667)) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +- [Fix `--load-pages`](https://github.com/yt-dlp/yt-dlp/commit/81b4712bca608b9015aa68a4d96661d56e9cb894) by [pukkandan](https://github.com/pukkandan) +- [Make `_search_nuxt_data` more lenient](https://github.com/yt-dlp/yt-dlp/commit/904a19ee93195ce0bd4b08bd22b186120afb5b17) by [std-move](https://github.com/std-move) +- **abematv** + - [Fix proxy handling](https://github.com/yt-dlp/yt-dlp/commit/497bbbbd7328cb705f70eced94dbd90993819a46) ([#8046](https://github.com/yt-dlp/yt-dlp/issues/8046)) by [SevenLives](https://github.com/SevenLives) + - [Temporary fix for protocol handler](https://github.com/yt-dlp/yt-dlp/commit/9f66247289b9f8ecf931833b3f5f127274dd2161) by [pukkandan](https://github.com/pukkandan) +- **amazonminitv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/538d37671a17e0782d17f08df17800e2e3bd57c8) by [bashonly](https://github.com/bashonly), [GautamMKGarg](https://github.com/GautamMKGarg) +- **antenna**: [Support antenna.gr](https://github.com/yt-dlp/yt-dlp/commit/665876034c8d3c031443f6b4958bed02ccdf4164) ([#7584](https://github.com/yt-dlp/yt-dlp/issues/7584)) by [stdedos](https://github.com/stdedos) +- **artetv**: [Fix HLS formats extraction](https://github.com/yt-dlp/yt-dlp/commit/c2da0b5ea215298135f76e3dc14b972a3c4afacb) by [bashonly](https://github.com/bashonly) +- **axs**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/aee6b9b88c0bcccf27fd23b7e00fc0b7b168928f) ([#8094](https://github.com/yt-dlp/yt-dlp/issues/8094)) by [barsnick](https://github.com/barsnick) +- **banbye**: [Support video ids containing a hyphen](https://github.com/yt-dlp/yt-dlp/commit/578a82e497502b951036ce9da6fe0dac6937ac27) ([#8059](https://github.com/yt-dlp/yt-dlp/issues/8059)) by [kshitiz305](https://github.com/kshitiz305) +- **bbc**: [Extract tracklist as chapters](https://github.com/yt-dlp/yt-dlp/commit/eda0e415d26eb084e570cf5372d38ee1f616b70f) ([#7788](https://github.com/yt-dlp/yt-dlp/issues/7788)) by [garret1317](https://github.com/garret1317) +- **bild.de**: [Extract HLS formats](https://github.com/yt-dlp/yt-dlp/commit/b4c1c408c63724339eb12b16c91b253a7ee62cfa) ([#8032](https://github.com/yt-dlp/yt-dlp/issues/8032)) by [barsnick](https://github.com/barsnick) +- **bilibili** + - [Add support for series, favorites and watch later](https://github.com/yt-dlp/yt-dlp/commit/9e68747f9607f05e92bb7d9b6e79d678b50070e1) ([#7518](https://github.com/yt-dlp/yt-dlp/issues/7518)) by [c-basalt](https://github.com/c-basalt) + - [Extract Dolby audio formats](https://github.com/yt-dlp/yt-dlp/commit/b84fda7388dd20d38921e23b469147f3957c1812) ([#8142](https://github.com/yt-dlp/yt-dlp/issues/8142)) by [ClosedPort22](https://github.com/ClosedPort22) + - [Extract `format_id`](https://github.com/yt-dlp/yt-dlp/commit/5336bf57a7061e0955a37f0542fc8ebf50d55b17) ([#7555](https://github.com/yt-dlp/yt-dlp/issues/7555)) by [c-basalt](https://github.com/c-basalt) +- **bilibilibangumi**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/bdd0b75e3f41ff35440eda6d395008beef19ef2f) ([#7337](https://github.com/yt-dlp/yt-dlp/issues/7337)) by [GD-Slime](https://github.com/GD-Slime) +- **bpb**: [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/f659e6439444ac64305b5c80688cd82f59d2279c) ([#8119](https://github.com/yt-dlp/yt-dlp/issues/8119)) by [Grub4K](https://github.com/Grub4K) +- **brilliantpala**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/92feb5654c5a4c81ba872904a618700fcbb3e546) ([#6680](https://github.com/yt-dlp/yt-dlp/issues/6680)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **canal1, caracoltvplay**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b3febedbeb662dfdf9b5c1d5799039ad4fc969de) ([#7151](https://github.com/yt-dlp/yt-dlp/issues/7151)) by [elyse0](https://github.com/elyse0) +- **cbc**: [Ignore any 426 from API](https://github.com/yt-dlp/yt-dlp/commit/9bf14be775289bd88cc1f5c89fd761ae51879484) ([#7689](https://github.com/yt-dlp/yt-dlp/issues/7689)) by [makew0rld](https://github.com/makew0rld) +- **cbcplayer**: [Extract HLS formats and subtitles](https://github.com/yt-dlp/yt-dlp/commit/339c339fec095ff4141b20e6aa83629117fb26df) ([#7484](https://github.com/yt-dlp/yt-dlp/issues/7484)) by [trainman261](https://github.com/trainman261) +- **cbcplayerplaylist**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/ed711897814f3ee0b1822e4205e74133467e8f1c) ([#7870](https://github.com/yt-dlp/yt-dlp/issues/7870)) by [trainman261](https://github.com/trainman261) +- **cineverse**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/15591940ff102d1ae337d603a46d8f238c83a61f) ([#8146](https://github.com/yt-dlp/yt-dlp/issues/8146)) by [garret1317](https://github.com/garret1317) +- **crunchyroll**: [Remove initial state extraction](https://github.com/yt-dlp/yt-dlp/commit/9b16762f48914de9ac914601769c76668e433325) ([#7632](https://github.com/yt-dlp/yt-dlp/issues/7632)) by [Grub4K](https://github.com/Grub4K) +- **douyutv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/21f40e75dfc0055ea9cdbd7fe2c46c6f9b561afd) ([#7652](https://github.com/yt-dlp/yt-dlp/issues/7652)) by [c-basalt](https://github.com/c-basalt) +- **dropbox**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b9f2bc2dbed2323734a0d18e65e1e2e23dc833d8) ([#7926](https://github.com/yt-dlp/yt-dlp/issues/7926)) by [bashonly](https://github.com/bashonly), [denhotte](https://github.com/denhotte), [nathantouze](https://github.com/nathantouze) (With fixes in [099fb1b](https://github.com/yt-dlp/yt-dlp/commit/099fb1b35cf835303306549f5113d1802d79c9c7) by [bashonly](https://github.com/bashonly)) +- **eplus**: inbound: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/295fbb3ae3a7d0dd50e286be5c487cf145ed5778) ([#5782](https://github.com/yt-dlp/yt-dlp/issues/5782)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **expressen**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/a5e264d74b4bd60c6e7ec4e38f1a23af4e420531) ([#8153](https://github.com/yt-dlp/yt-dlp/issues/8153)) by [kylegustavo](https://github.com/kylegustavo) +- **facebook** + - [Add dash manifest URL](https://github.com/yt-dlp/yt-dlp/commit/a854fbec56d5004f5147116a41d1dd050632a579) ([#7743](https://github.com/yt-dlp/yt-dlp/issues/7743)) by [ringus1](https://github.com/ringus1) + - [Fix webpage extraction](https://github.com/yt-dlp/yt-dlp/commit/d3d81cc98f554d0adb87d24bfd6fabaaa803944d) ([#7890](https://github.com/yt-dlp/yt-dlp/issues/7890)) by [ringus1](https://github.com/ringus1) + - [Improve format sorting](https://github.com/yt-dlp/yt-dlp/commit/308936619c8a4f3a52d73c829c2006ff6c55fea2) ([#8074](https://github.com/yt-dlp/yt-dlp/issues/8074)) by [fireattack](https://github.com/fireattack) + - reel: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/bb5d84c9d2f1e978c3eddfb5ccbe138036682a36) ([#7564](https://github.com/yt-dlp/yt-dlp/issues/7564)) by [bashonly](https://github.com/bashonly), [demon071](https://github.com/demon071) +- **fox**: [Support foxsports.com](https://github.com/yt-dlp/yt-dlp/commit/30b29f37159e9226e2f2d5434c9a4096ac4efa2e) ([#7724](https://github.com/yt-dlp/yt-dlp/issues/7724)) by [ischmidt20](https://github.com/ischmidt20) +- **funker530**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/0ce1f48bf1cb78d40d734ce73ee1c90eccf92274) ([#8040](https://github.com/yt-dlp/yt-dlp/issues/8040)) by [04-pasha-04](https://github.com/04-pasha-04) +- **generic** + - [Fix KVS thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/53675852195d8dd859555d4789944a6887171ff8) by [bashonly](https://github.com/bashonly) + - [Fix generic title for embeds](https://github.com/yt-dlp/yt-dlp/commit/994f7ef8e6003f4b7b258528755d0b6adcc31714) by [pukkandan](https://github.com/pukkandan) +- **gofile**: [Update token](https://github.com/yt-dlp/yt-dlp/commit/99c99c7185f5d8e9b3699a6fc7f86ec663d7b97e) by [bashonly](https://github.com/bashonly) +- **hotstar** + - [Extract `release_year`](https://github.com/yt-dlp/yt-dlp/commit/7237c8dca0590aa7438ade93f927df88c9381ec7) ([#7869](https://github.com/yt-dlp/yt-dlp/issues/7869)) by [Rajeshwaran2001](https://github.com/Rajeshwaran2001) + - [Make metadata extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/30ea88591b728cca0896018dbf67c2298070c669) by [bashonly](https://github.com/bashonly) + - [Support `/clips/` URLs](https://github.com/yt-dlp/yt-dlp/commit/86eeb044c2342d68c6ef177577f87852e6badd85) ([#7710](https://github.com/yt-dlp/yt-dlp/issues/7710)) by [bashonly](https://github.com/bashonly) +- **hungama**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/4b3a6ef1b3e235ba9a45142830b6edb357c71696) ([#7757](https://github.com/yt-dlp/yt-dlp/issues/7757)) by [bashonly](https://github.com/bashonly), [Yalab7](https://github.com/Yalab7) +- **indavideoembed**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/63e0c5748c0eb461a2ccca4181616eb930b4b750) ([#8129](https://github.com/yt-dlp/yt-dlp/issues/8129)) by [aky-01](https://github.com/aky-01) +- **iprima**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/568f08051841aedea968258889539741e26009e9) ([#7216](https://github.com/yt-dlp/yt-dlp/issues/7216)) by [std-move](https://github.com/std-move) +- **lbry**: [Fix original format extraction](https://github.com/yt-dlp/yt-dlp/commit/127a22460658ac39cbe5c4b3fb88d578363e0dfa) ([#7711](https://github.com/yt-dlp/yt-dlp/issues/7711)) by [bashonly](https://github.com/bashonly) +- **lecturio**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/efa2339502a37cf13ae7f143bd8b2c28f452d1cd) ([#7649](https://github.com/yt-dlp/yt-dlp/issues/7649)) by [simon300000](https://github.com/simon300000) +- **magellantv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f4ea501551526ebcb54d19b84cf0ebe798583a85) ([#7616](https://github.com/yt-dlp/yt-dlp/issues/7616)) by [bashonly](https://github.com/bashonly) +- **massengeschmack.tv**: [Fix title extraction](https://github.com/yt-dlp/yt-dlp/commit/81f46ac573dc443ad48560f308582a26784d3015) ([#7813](https://github.com/yt-dlp/yt-dlp/issues/7813)) by [sb0stn](https://github.com/sb0stn) +- **media.ccc.de**: lists: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/cf11b40ac40e3d23a6352753296f3a732886efb9) ([#8144](https://github.com/yt-dlp/yt-dlp/issues/8144)) by [Rohxn16](https://github.com/Rohxn16) +- **mediaite**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/630a55df8de7747e79aa680959d785dfff2c4b76) ([#7923](https://github.com/yt-dlp/yt-dlp/issues/7923)) by [Grabien](https://github.com/Grabien) +- **mediaklikk**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6e07e4bc7e59f5bdb60e93c011e57b18b009f2b5) ([#8086](https://github.com/yt-dlp/yt-dlp/issues/8086)) by [bashonly](https://github.com/bashonly), [zhallgato](https://github.com/zhallgato) +- **mediastream**: [Make embed extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/635ae31f68a3ac7f6393d59657ed711e34ee3552) by [bashonly](https://github.com/bashonly) +- **mixcloud**: [Update API URL](https://github.com/yt-dlp/yt-dlp/commit/7b71643cc986de9a3768dac4ac9b64f4d05e7f5e) ([#8114](https://github.com/yt-dlp/yt-dlp/issues/8114)) by [garret1317](https://github.com/garret1317) +- **monstercat**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/eaee21bf71889d495076037cbe590c8c0b21ef3a) ([#8133](https://github.com/yt-dlp/yt-dlp/issues/8133)) by [garret1317](https://github.com/garret1317) +- **motortrendondemand**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/c03a58ec9933e4a42c2d8fa80b8a0ddb2cde64e6) ([#7683](https://github.com/yt-dlp/yt-dlp/issues/7683)) by [AmirAflak](https://github.com/AmirAflak) +- **museai**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/65cfa2b057d7946fbe322155a778fe206556d0c6) ([#7614](https://github.com/yt-dlp/yt-dlp/issues/7614)) by [bashonly](https://github.com/bashonly) +- **mzaalo**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/d7aee8e310b2c4f21d50aac0b420e1b3abde21a4) by [bashonly](https://github.com/bashonly) +- **n1info**: article: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/8ac5b6d96ae5c60cd5ae2495949e0068a6754c45) ([#7373](https://github.com/yt-dlp/yt-dlp/issues/7373)) by [u-spec-png](https://github.com/u-spec-png) +- **nfl.com**: plus, replay: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/1eaca74bc2ca0f5b1ec532f24c61de44f2e8cb2d) ([#7838](https://github.com/yt-dlp/yt-dlp/issues/7838)) by [bashonly](https://github.com/bashonly) +- **niconicochannelplus**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/698beb9a497f51693e64d167e572ff9efa4bc25f) ([#5686](https://github.com/yt-dlp/yt-dlp/issues/5686)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **nitter**: [Fix title extraction fallback](https://github.com/yt-dlp/yt-dlp/commit/a83da3717d30697102e76f63a6f29d77f9373c2a) ([#8102](https://github.com/yt-dlp/yt-dlp/issues/8102)) by [ApoorvShah111](https://github.com/ApoorvShah111) +- **noodlemagazine**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/bae4834245a708fff97219849ec880c319c88bc6) ([#7830](https://github.com/yt-dlp/yt-dlp/issues/7830)) by [RedDeffender](https://github.com/RedDeffender) (With fixes in [69dbfe0](https://github.com/yt-dlp/yt-dlp/commit/69dbfe01c47cd078682a87f179f5846e2679e927) by [bashonly](https://github.com/bashonly)) +- **novaembed**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2269065ad60cb0ab62408ae6a7b20283e5252232) ([#7910](https://github.com/yt-dlp/yt-dlp/issues/7910)) by [std-move](https://github.com/std-move) +- **patreoncampaign**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/11de6fec9c9b8d34d1f90c8e6218ec58a3471b58) ([#7664](https://github.com/yt-dlp/yt-dlp/issues/7664)) by [bashonly](https://github.com/bashonly) +- **pbs**: [Add extractor `PBSKidsIE`](https://github.com/yt-dlp/yt-dlp/commit/6d6081dda1290a85bdab6717f239289e3aa74c8e) ([#7602](https://github.com/yt-dlp/yt-dlp/issues/7602)) by [snixon](https://github.com/snixon) +- **piapro**: [Support `/content` URL](https://github.com/yt-dlp/yt-dlp/commit/1bcb9fe8715b1f288efc322be3de409ee0597080) ([#7592](https://github.com/yt-dlp/yt-dlp/issues/7592)) by [FinnRG](https://github.com/FinnRG) +- **piaulizaportal**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6636021206dad17c7745ae6bce6cb73d6f2ef319) ([#7903](https://github.com/yt-dlp/yt-dlp/issues/7903)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **picartovod**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/db9743894071760f994f640a4c24358f749a78c0) ([#7727](https://github.com/yt-dlp/yt-dlp/issues/7727)) by [Frankgoji](https://github.com/Frankgoji) +- **pornbox**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/40999467f72db074a3f13057da9bf82a857530fe) ([#7386](https://github.com/yt-dlp/yt-dlp/issues/7386)) by [niemands](https://github.com/niemands) +- **pornhub**: [Update access cookies for UK](https://github.com/yt-dlp/yt-dlp/commit/1d3d579c2142f69831b6ae140e1d8e824e07fa0e) ([#7591](https://github.com/yt-dlp/yt-dlp/issues/7591)) by [zhong-yiyu](https://github.com/zhong-yiyu) +- **pr0gramm**: [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/b532556d0a85e7d76f8f0880861232fb706ddbc5) ([#8151](https://github.com/yt-dlp/yt-dlp/issues/8151)) by [Grub4K](https://github.com/Grub4K) +- **radiofrance**: [Add support for livestreams, podcasts, playlists](https://github.com/yt-dlp/yt-dlp/commit/ba8e9eb2c8bbb699f314169fab8e544437ad731e) ([#7006](https://github.com/yt-dlp/yt-dlp/issues/7006)) by [elyse0](https://github.com/elyse0) +- **rbgtum**: [Fix extraction and support new URL format](https://github.com/yt-dlp/yt-dlp/commit/5fccabac27ca3c1165ade1b0df6fbadc24258dc2) ([#7690](https://github.com/yt-dlp/yt-dlp/issues/7690)) by [simon300000](https://github.com/simon300000) +- **reddit** + - [Extract subtitles](https://github.com/yt-dlp/yt-dlp/commit/20c3c9b433dd47faf0dbde6b46e4e34eb76109a5) by [bashonly](https://github.com/bashonly) + - [Fix thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/9a04113dfbb69b904e4e2bea736da293505786b8) by [bashonly](https://github.com/bashonly) +- **rtvslo**: [Fix format extraction](https://github.com/yt-dlp/yt-dlp/commit/94389b225d9bcf29aa7ba8afaf1bbd7c62204eae) ([#8131](https://github.com/yt-dlp/yt-dlp/issues/8131)) by [bashonly](https://github.com/bashonly) +- **rule34video**: [Extract tags](https://github.com/yt-dlp/yt-dlp/commit/58493923e9b6f774947a2131e5258e9f3cf816be) ([#7117](https://github.com/yt-dlp/yt-dlp/issues/7117)) by [soundchaser128](https://github.com/soundchaser128) +- **rumble**: [Fix embed extraction](https://github.com/yt-dlp/yt-dlp/commit/23d829a3420450bcfb0788e6fb2cf4f6acdbe596) ([#8035](https://github.com/yt-dlp/yt-dlp/issues/8035)) by [trislee](https://github.com/trislee) +- **s4c** + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b9de629d78ce31699f2de886071dc257830f9676) ([#7730](https://github.com/yt-dlp/yt-dlp/issues/7730)) by [ifan-t](https://github.com/ifan-t) + - [Add series support and extract subs/thumbs](https://github.com/yt-dlp/yt-dlp/commit/fe371dcf0ba5ce8d42480eade54eeeac99ab3cb0) ([#7776](https://github.com/yt-dlp/yt-dlp/issues/7776)) by [ifan-t](https://github.com/ifan-t) +- **sohu**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5be7e978867b5f66ad6786c674d79d40e950ae16) ([#7628](https://github.com/yt-dlp/yt-dlp/issues/7628)) by [bashonly](https://github.com/bashonly), [c-basalt](https://github.com/c-basalt) +- **stageplus**: [Fix m3u8 extraction](https://github.com/yt-dlp/yt-dlp/commit/56b3dc03354b75be995759d8441d2754c0442b9a) ([#7929](https://github.com/yt-dlp/yt-dlp/issues/7929)) by [bashonly](https://github.com/bashonly) +- **streamanity**: [Remove](https://github.com/yt-dlp/yt-dlp/commit/2cfe221fbbe46faa3f46552c08d947a51f424903) ([#7571](https://github.com/yt-dlp/yt-dlp/issues/7571)) by [alerikaisattera](https://github.com/alerikaisattera) +- **svtplay**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/2301b5c1b77a65abbb46b72f91e1e4666fd5d985) ([#7789](https://github.com/yt-dlp/yt-dlp/issues/7789)) by [dirkf](https://github.com/dirkf), [wader](https://github.com/wader) +- **tbsjp**: [Add episode, program, playlist extractors](https://github.com/yt-dlp/yt-dlp/commit/876b70c8edf4c0147f180bd981fbc4d625cbfb9c) ([#7765](https://github.com/yt-dlp/yt-dlp/issues/7765)) by [garret1317](https://github.com/garret1317) +- **tiktok** + - [Fix audio-only format extraction](https://github.com/yt-dlp/yt-dlp/commit/b09bd0c19648f60c59fb980cd454cb0069959fb9) ([#7712](https://github.com/yt-dlp/yt-dlp/issues/7712)) by [bashonly](https://github.com/bashonly) + - [Fix webpage extraction](https://github.com/yt-dlp/yt-dlp/commit/069cbece9dba6384f1cc5fcfc7ce562a31af42fc) by [bashonly](https://github.com/bashonly) +- **triller**: [Fix unlisted video extraction](https://github.com/yt-dlp/yt-dlp/commit/39837ae3199aa934299badbd0d63243ed639e6c8) ([#7670](https://github.com/yt-dlp/yt-dlp/issues/7670)) by [bashonly](https://github.com/bashonly) +- **tv5mondeplus**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7d3d658f4c558ee7d72b1c01b46f2126948681cd) ([#7952](https://github.com/yt-dlp/yt-dlp/issues/7952)) by [dirkf](https://github.com/dirkf), [korli](https://github.com/korli) +- **twitcasting** + - [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/cebbd33b1c678149fc8f0e254db6fc0da317ea80) ([#8120](https://github.com/yt-dlp/yt-dlp/issues/8120)) by [c-basalt](https://github.com/c-basalt) + - [Support `--wait-for-video`](https://github.com/yt-dlp/yt-dlp/commit/c1d71d0d9f41db5e4306c86af232f5f6220a130b) ([#7975](https://github.com/yt-dlp/yt-dlp/issues/7975)) by [at-wat](https://github.com/at-wat) +- **twitter** + - [Add fallback, improve error handling](https://github.com/yt-dlp/yt-dlp/commit/6014355c6142f68e20c8374e3787e5b5820f19e2) ([#7621](https://github.com/yt-dlp/yt-dlp/issues/7621)) by [bashonly](https://github.com/bashonly) + - [Fix GraphQL and legacy API](https://github.com/yt-dlp/yt-dlp/commit/92315c03774cfabb3a921884326beb4b981f786b) ([#7516](https://github.com/yt-dlp/yt-dlp/issues/7516)) by [bashonly](https://github.com/bashonly) + - [Fix retweet extraction and syndication API](https://github.com/yt-dlp/yt-dlp/commit/a006ce2b27357c15792eb5c18f06765e640b801c) ([#8016](https://github.com/yt-dlp/yt-dlp/issues/8016)) by [bashonly](https://github.com/bashonly) + - [Revert 92315c03774cfabb3a921884326beb4b981f786b](https://github.com/yt-dlp/yt-dlp/commit/b03fa7834579a01cc5fba48c0e73488a16683d48) by [pukkandan](https://github.com/pukkandan) + - spaces + - [Fix format protocol](https://github.com/yt-dlp/yt-dlp/commit/613dbce177d34ffc31053e8e01acf4bb107bcd1e) ([#7550](https://github.com/yt-dlp/yt-dlp/issues/7550)) by [bashonly](https://github.com/bashonly) + - [Pass referer header to downloader](https://github.com/yt-dlp/yt-dlp/commit/c6ef553792ed48462f9fd0e78143bef6b1a71c2e) by [bashonly](https://github.com/bashonly) +- **unsupported**: [List more sites with DRM](https://github.com/yt-dlp/yt-dlp/commit/e7057383380d7d53815f8feaf90ca3dcbde88983) by [pukkandan](https://github.com/pukkandan) +- **videa**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/98eac0e6ba0e510ae7dfdfd249d42ee71fb272b1) ([#8003](https://github.com/yt-dlp/yt-dlp/issues/8003)) by [aky-01](https://github.com/aky-01), [hatsomatt](https://github.com/hatsomatt) +- **vrt**: [Update token signing key](https://github.com/yt-dlp/yt-dlp/commit/325191d0c9bf3fe257b8a7c2eb95080f44f6ddfc) ([#7519](https://github.com/yt-dlp/yt-dlp/issues/7519)) by [Zprokkel](https://github.com/Zprokkel) +- **wat.tv**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/7cccab79e7d00ed965b48b8cefce1da8a0513409) ([#7898](https://github.com/yt-dlp/yt-dlp/issues/7898)) by [davinkevin](https://github.com/davinkevin) +- **wdr**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/5d0395498d7065aa5e55bac85fa9354b4b0d48eb) ([#7979](https://github.com/yt-dlp/yt-dlp/issues/7979)) by [szabyg](https://github.com/szabyg) +- **web.archive**: vlive: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/9652bca1bd02f6bc1b8cb1e186f2ccbf32225561) ([#8132](https://github.com/yt-dlp/yt-dlp/issues/8132)) by [bashonly](https://github.com/bashonly) +- **weibo**: [Fix extractor and support user extraction](https://github.com/yt-dlp/yt-dlp/commit/69b03f84f8378b0b5a2fbae56f9b7d860b2f529e) ([#7657](https://github.com/yt-dlp/yt-dlp/issues/7657)) by [c-basalt](https://github.com/c-basalt) +- **weverse**: [Support extraction without auth](https://github.com/yt-dlp/yt-dlp/commit/c2d8ee0000302aba63476b7d5bd8793e57b6c8c6) ([#7924](https://github.com/yt-dlp/yt-dlp/issues/7924)) by [seproDev](https://github.com/seproDev) +- **wimbledon**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a15fcd299e767a510debd8dc1646fe863b96ce0e) ([#7551](https://github.com/yt-dlp/yt-dlp/issues/7551)) by [nnoboa](https://github.com/nnoboa) +- **wrestleuniverseppv**: [Fix HLS AES key extraction](https://github.com/yt-dlp/yt-dlp/commit/dae349da97cafe7357106a8f3187fd48a2ad1210) by [bashonly](https://github.com/bashonly) +- **youtube** + - [Add `player_params` extractor arg](https://github.com/yt-dlp/yt-dlp/commit/ba06d77a316650ff057347d224b5afa8b203ad65) ([#7719](https://github.com/yt-dlp/yt-dlp/issues/7719)) by [coletdjnz](https://github.com/coletdjnz) + - [Fix `player_params` arg being converted to lowercase](https://github.com/yt-dlp/yt-dlp/commit/546b2c28a106cf8101d481b215b676d1b091d276) by [coletdjnz](https://github.com/coletdjnz) + - [Fix consent cookie](https://github.com/yt-dlp/yt-dlp/commit/378ae9f9fb8e8c86e6ac89c4c5b815b48ce93620) ([#7774](https://github.com/yt-dlp/yt-dlp/issues/7774)) by [coletdjnz](https://github.com/coletdjnz) + - tab: [Detect looping feeds](https://github.com/yt-dlp/yt-dlp/commit/1ba6fe9db5f660d5538588315c23ad6cf0371c5f) ([#6621](https://github.com/yt-dlp/yt-dlp/issues/6621)) by [coletdjnz](https://github.com/coletdjnz) +- **zaiko**: [Improve thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/ecef42c3adbcb6a84405139047923c4967316f28) ([#8054](https://github.com/yt-dlp/yt-dlp/issues/8054)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **zee5**: [Update access token endpoint](https://github.com/yt-dlp/yt-dlp/commit/a0de8bb8601146b8f87bf7cd562eef8bfb4690be) ([#7914](https://github.com/yt-dlp/yt-dlp/issues/7914)) by [bashonly](https://github.com/bashonly) +- **zoom**: [Extract duration](https://github.com/yt-dlp/yt-dlp/commit/66cc64ff6696f9921ff112a278542f8d999ffea4) by [bashonly](https://github.com/bashonly) + +#### Downloader changes +- **external** + - [Fix ffmpeg input from stdin](https://github.com/yt-dlp/yt-dlp/commit/e57eb98222d29cc4c09ee975d3c492274a6e5be3) ([#7655](https://github.com/yt-dlp/yt-dlp/issues/7655)) by [bashonly](https://github.com/bashonly) + - [Fixes to cookie handling](https://github.com/yt-dlp/yt-dlp/commit/42ded0a429c20ec13dc006825e1508d9a02f0ad4) by [bashonly](https://github.com/bashonly) + +#### Postprocessor changes +- **embedthumbnail**: [Support `m4v`](https://github.com/yt-dlp/yt-dlp/commit/8a4cd12c8f8e93292e3e95200b9d17a3af39624c) ([#7583](https://github.com/yt-dlp/yt-dlp/issues/7583)) by [Neurognostic](https://github.com/Neurognostic) + +#### Networking changes +- [Add module](https://github.com/yt-dlp/yt-dlp/commit/c365dba8430ee33abda85d31f95128605bf240eb) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [pukkandan](https://github.com/pukkandan) +- [Add request handler preference framework](https://github.com/yt-dlp/yt-dlp/commit/db7b054a6111ca387220d0eb87bf342f9c130eb8) ([#7603](https://github.com/yt-dlp/yt-dlp/issues/7603)) by [coletdjnz](https://github.com/coletdjnz) +- [Add strict Request extension checking](https://github.com/yt-dlp/yt-dlp/commit/86aea0d3a213da3be1da638b9b828e6f0ee1d59f) ([#7604](https://github.com/yt-dlp/yt-dlp/issues/7604)) by [coletdjnz](https://github.com/coletdjnz) +- [Fix POST requests with zero-length payloads](https://github.com/yt-dlp/yt-dlp/commit/71baa490ebd3655746430f208a9b605d120cd315) ([#7648](https://github.com/yt-dlp/yt-dlp/issues/7648)) by [bashonly](https://github.com/bashonly) +- [Fix `--legacy-server-connect`](https://github.com/yt-dlp/yt-dlp/commit/75dc8e673b481a82d0688aeec30f6c65d82bb359) ([#7645](https://github.com/yt-dlp/yt-dlp/issues/7645)) by [bashonly](https://github.com/bashonly) +- [Fix various socks proxy bugs](https://github.com/yt-dlp/yt-dlp/commit/20fbbd9249a2f26c7ae579bde5ba5d69aa8fac69) ([#8065](https://github.com/yt-dlp/yt-dlp/issues/8065)) by [coletdjnz](https://github.com/coletdjnz) +- [Ignore invalid proxies in env](https://github.com/yt-dlp/yt-dlp/commit/bbeacff7fcaa3b521066088a5ccbf34ef5070d1d) ([#7704](https://github.com/yt-dlp/yt-dlp/issues/7704)) by [coletdjnz](https://github.com/coletdjnz) +- [Rewrite architecture](https://github.com/yt-dlp/yt-dlp/commit/227bf1a33be7b89cd7d44ad046844c4ccba104f4) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [coletdjnz](https://github.com/coletdjnz) +- **Request Handler** + - urllib + - [Remove dot segments during URL normalization](https://github.com/yt-dlp/yt-dlp/commit/4bf912282a34b58b6b35d8f7e6be535770c89c76) ([#7662](https://github.com/yt-dlp/yt-dlp/issues/7662)) by [coletdjnz](https://github.com/coletdjnz) + - [Simplify gzip decoding](https://github.com/yt-dlp/yt-dlp/commit/59e92b1f1833440bb2190f847eb735cf0f90bc85) ([#7611](https://github.com/yt-dlp/yt-dlp/issues/7611)) by [Grub4K](https://github.com/Grub4K) (With fixes in [77bff23](https://github.com/yt-dlp/yt-dlp/commit/77bff23ee97565bab2e0d75b893a21bf7983219a)) + +#### Misc. changes +- **build**: [Make sure deprecated modules are added](https://github.com/yt-dlp/yt-dlp/commit/131d132da5c98c6c78bd7eed4b37f4458561b3d9) by [pukkandan](https://github.com/pukkandan) +- **cleanup** + - [Add color to `download-archive` message](https://github.com/yt-dlp/yt-dlp/commit/2b029ca0a9f9105c4f7626993fa60e54c9782749) ([#5138](https://github.com/yt-dlp/yt-dlp/issues/5138)) by [aaruni96](https://github.com/aaruni96), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan) + - Miscellaneous + - [6148833](https://github.com/yt-dlp/yt-dlp/commit/6148833f5ceb7674142ddb8d761ffe03cee7df69), [62b5c94](https://github.com/yt-dlp/yt-dlp/commit/62b5c94cadaa5f596dc1a7083db9db12efe357be) by [pukkandan](https://github.com/pukkandan) + - [5ca095c](https://github.com/yt-dlp/yt-dlp/commit/5ca095cbcde3e32642a4fe5b2d69e8e3c785a021) by [barsnick](https://github.com/barsnick), [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [gamer191](https://github.com/gamer191), [Grub4K](https://github.com/Grub4K), [sqrtNOT](https://github.com/sqrtNOT) + - [088add9](https://github.com/yt-dlp/yt-dlp/commit/088add9567d39b758737e4299a0e619fd89d2e8f) by [Grub4K](https://github.com/Grub4K) +- **devscripts**: `make_changelog`: [Fix changelog grouping and add networking group](https://github.com/yt-dlp/yt-dlp/commit/30ba233d4cee945756ed7344e7ddb3a90d2ae608) ([#8124](https://github.com/yt-dlp/yt-dlp/issues/8124)) by [Grub4K](https://github.com/Grub4K) +- **docs**: [Update collaborators](https://github.com/yt-dlp/yt-dlp/commit/1be0a96a4d14f629097509fcc89d15f69a8243c7) by [Grub4K](https://github.com/Grub4K) +- **test** + - [Add tests for socks proxies](https://github.com/yt-dlp/yt-dlp/commit/fcd6a76adc49d5cd8783985c7ce35384b72e545f) ([#7908](https://github.com/yt-dlp/yt-dlp/issues/7908)) by [coletdjnz](https://github.com/coletdjnz) + - [Fix `httplib_validation_errors` test for old Python versions](https://github.com/yt-dlp/yt-dlp/commit/95abea9a03289da1384e5bda3d590223ccc0a238) ([#7677](https://github.com/yt-dlp/yt-dlp/issues/7677)) by [coletdjnz](https://github.com/coletdjnz) + - [Fix `test_load_certifi`](https://github.com/yt-dlp/yt-dlp/commit/de20687ee6b742646128a7629b57096631a20619) by [pukkandan](https://github.com/pukkandan) + - download: [Test for `expected_exception`](https://github.com/yt-dlp/yt-dlp/commit/661c9a1d029296b28e0b2f8be8a72a43abaf6536) by [at-wat](https://github.com/at-wat) + ### 2023.07.06 #### Important changes diff --git a/supportedsites.md b/supportedsites.md index 379d28ef3..620e0f305 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -77,7 +77,7 @@ - **AnimalPlanet** - **ant1newsgr:article**: ant1news.gr articles - **ant1newsgr:embed**: ant1news.gr embedded videos - - **ant1newsgr:watch**: ant1news.gr videos + - **antenna:watch**: antenna.gr and ant1news.gr videos - **Anvato** - **aol.com**: Yahoo screen and movies - **APA** @@ -98,8 +98,6 @@ - **ArteTVCategory** - **ArteTVEmbed** - **ArteTVPlaylist** - - **AsianCrush** - - **AsianCrushPlaylist** - **AtresPlayer**: [*atresplayer*](## "netrc machine") - **AtScaleConfEvent** - **ATTTechChannel** @@ -118,6 +116,7 @@ - **awaan:live** - **awaan:season** - **awaan:video** + - **axs.tv** - **AZMedien**: AZ Medien videos - **BaiduVideo**: 百度视频 - **BanBye** @@ -162,11 +161,16 @@ - **BilibiliAudioAlbum** - **BiliBiliBangumi** - **BiliBiliBangumiMedia** + - **BiliBiliBangumiSeason** + - **BilibiliCollectionList** + - **BilibiliFavoritesList** - **BiliBiliPlayer** + - **BilibiliPlaylist** - **BiliBiliSearch**: Bilibili video search; "bilisearch:" prefix + - **BilibiliSeriesList** - **BilibiliSpaceAudio** - - **BilibiliSpacePlaylist** - **BilibiliSpaceVideo** + - **BilibiliWatchlater** - **BiliIntl**: [*biliintl*](## "netrc machine") - **biliIntl:series**: [*biliintl*](## "netrc machine") - **BiliLive** @@ -201,6 +205,8 @@ - **BreitBart** - **brightcove:legacy** - **brightcove:new** + - **Brilliantpala:Classes**: [*brilliantpala*](## "netrc machine") VoD on classes.brilliantpala.org + - **Brilliantpala:Elearn**: [*brilliantpala*](## "netrc machine") VoD on elearn.brilliantpala.org - **BRMediathek**: Bayerischer Rundfunk Mediathek - **bt:article**: Bergens Tidende Articles - **bt:vestlendingen**: Bergens Tidende - Vestlendingen @@ -220,14 +226,17 @@ - **Camsoda** - **CamtasiaEmbed** - **CamWithHer** + - **Canal1** - **CanalAlpha** - **canalc2.tv** - **Canalplus**: mycanal.fr and piwiplus.fr + - **CaracolTvPlay**: [*caracoltv-play*](## "netrc machine") - **CarambaTV** - **CarambaTVPage** - **CartoonNetwork** - **cbc.ca** - **cbc.ca:player** + - **cbc.ca:​player:playlist** - **CBS** - **CBSInteractive** - **CBSLocal** @@ -257,6 +266,8 @@ - **Cinchcast** - **Cinemax** - **CinetecaMilano** + - **Cineverse** + - **CineverseDetails** - **CiscoLiveSearch** - **CiscoLiveSession** - **ciscowebex**: Cisco Webex @@ -365,7 +376,7 @@ - **Dotsub** - **Douyin** - **DouyuShow** - - **DouyuTV**: 斗鱼 + - **DouyuTV**: 斗鱼直播 - **DPlay** - **DRBonanza** - **Drooble** @@ -408,6 +419,7 @@ - **Engadget** - **Epicon** - **EpiconSeries** + - **eplus:inbound**: e+ (イープラス) overseas - **Epoch** - **Eporner** - **EroProfile**: [*eroprofile*](## "netrc machine") @@ -732,6 +744,7 @@ - **lynda**: [*lynda*](## "netrc machine") lynda.com videos - **lynda:course**: [*lynda*](## "netrc machine") lynda.com online courses - **m6** + - **MagellanTV** - **MagentaMusik360** - **mailru**: Видео@Mail.Ru - **mailru:music**: Музыка@Mail.Ru @@ -812,6 +825,7 @@ - **Mofosex** - **MofosexEmbed** - **Mojvideo** + - **Monstercat** - **MonsterSirenHypergryphMusic** - **Morningstar**: morningstar.com - **Motherless** @@ -840,6 +854,7 @@ - **MujRozhlas** - **Murrtube** - **MurrtubeUser**: Murrtube user profile + - **MuseAI** - **MuseScore** - **MusicdexAlbum** - **MusicdexArtist** @@ -944,6 +959,9 @@ - **niconico:playlist** - **niconico:series** - **niconico:tag**: NicoNico video tag URLs + - **NiconicoChannelPlus**: ニコニコチャンネルプラス + - **NiconicoChannelPlus:​channel:lives**: ニコニコチャンネルプラス - チャンネル - ライブリスト. nicochannel.jp/channel/lives + - **NiconicoChannelPlus:​channel:videos**: ニコニコチャンネルプラス - チャンネル - 動画リスト. nicochannel.jp/channel/videos - **NiconicoUser** - **nicovideo:search**: Nico video search; "nicosearch:" prefix - **nicovideo:​search:date**: Nico video search, newest first; "nicosearchdate:" prefix @@ -1046,6 +1064,7 @@ - **Patreon** - **PatreonCampaign** - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) + - **PBSKids** - **PearVideo** - **PeekVids** - **peer.tv** @@ -1062,6 +1081,7 @@ - **phoenix.de** - **Photobucket** - **Piapro**: [*piapro*](## "netrc machine") + - **PIAULIZAPortal**: ulizaportal.jp - PIA LIVE STREAM - **Picarto** - **PicartoVod** - **Piksel** @@ -1105,6 +1125,7 @@ - **polskieradio:​podcast:list** - **Popcorntimes** - **PopcornTV** + - **Pornbox** - **PornCom** - **PornerBros** - **Pornez** @@ -1121,7 +1142,6 @@ - **PornTop** - **PornTube** - **Pr0gramm** - - **Pr0grammStatic** - **PrankCast** - **PremiershipRugby** - **PressTV** @@ -1156,6 +1176,10 @@ - **radiocanada** - **radiocanada:audiovideo** - **radiofrance** + - **RadioFranceLive** + - **RadioFrancePodcast** + - **RadioFranceProfile** + - **RadioFranceProgramSchedule** - **RadioJavan** - **radiokapital** - **radiokapital:show** @@ -1177,6 +1201,7 @@ - **RayWenderlichCourse** - **RbgTum** - **RbgTumCourse** + - **RbgTumNewCourse** - **RBMARadio** - **RCS** - **RCSEmbeds** @@ -1259,6 +1284,8 @@ - **Ruutu** - **Ruv** - **ruv.is:spila** + - **S4C** + - **S4CSeries** - **safari**: [*safari*](## "netrc machine") safaribooksonline.com online video - **safari:api**: [*safari*](## "netrc machine") - **safari:course**: [*safari*](## "netrc machine") safaribooksonline.com online courses @@ -1325,6 +1352,7 @@ - **Smotrim** - **Snotr** - **Sohu** + - **SohuV** - **SonyLIV**: [*sonyliv*](## "netrc machine") - **SonyLIVSeries** - **soundcloud**: [*soundcloud*](## "netrc machine") @@ -1378,7 +1406,6 @@ - **StoryFireSeries** - **StoryFireUser** - **Streamable** - - **Streamanity** - **streamcloud.eu** - **StreamCZ** - **StreamFF** @@ -1403,6 +1430,9 @@ - **Tagesschau** - **Tass** - **TBS** + - **TBSJPEpisode** + - **TBSJPPlaylist** + - **TBSJPProgram** - **TDSLifeway** - **Teachable**: [*teachable*](## "netrc machine") - **TeachableCourse**: [*teachable*](## "netrc machine") @@ -1702,7 +1732,6 @@ - **wdr:mobile**: (**Currently broken**) - **WDRElefant** - **WDRPage** - - **web.archive:vlive**: web.archive.org saved vlive videos - **web.archive:youtube**: web.archive.org saved youtube videos, "ytarchive:" prefix - **Webcamerapl** - **Webcaster** @@ -1710,7 +1739,8 @@ - **WebOfStories** - **WebOfStoriesPlaylist** - **Weibo** - - **WeiboMobile** + - **WeiboUser** + - **WeiboVideo** - **WeiqiTV**: WQTV - **wetv:episode** - **WeTvSeries** @@ -1726,6 +1756,7 @@ - **Whyp** - **wikimedia.org** - **Willow** + - **Wimbledon** - **WimTV** - **WinSportsVideo** - **Wistia** diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 67cfe44ef..2a7c84b93 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2023.07.06' +__version__ = '2023.09.24' -RELEASE_GIT_HEAD = 'b532a3481046e1eabb6232ee8196fb696c356ff6' +RELEASE_GIT_HEAD = '088add9567d39b758737e4299a0e619fd89d2e8f' VARIANT = None From eb5bdbfa70126c7d5355cc0954b63720522e462c Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Tue, 3 Oct 2023 19:42:30 +1300 Subject: [PATCH 27/41] [ie/youtube] Raise a warning for `Incomplete Data` instead of an error (#8238) Closes https://github.com/yt-dlp/yt-dlp/issues/8206 Adds `raise_incomplete_data` extractor arg to revert this behaviour and raise an error. Authored by: coletdjnz Co-authored-by: Simon Sawicki --- README.md | 1 + yt_dlp/extractor/youtube.py | 26 +++++++++++++++++++------- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 7bf446572..a0b69c9a1 100644 --- a/README.md +++ b/README.md @@ -1809,6 +1809,7 @@ The following extractors use this feature: * `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8) * `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others * `innertube_key`: Innertube API key to use for all API requests +* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning #### youtubetab (YouTube playlists, channels, feeds, etc.) * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index a39d17cf1..7e13aa779 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -941,7 +941,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor): def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None, ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None, default_client='web'): - for retry in self.RetryManager(): + raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE)) + # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal. + icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete)) + icd_rm = next(icd_retries) + main_retries = iter(self.RetryManager()) + main_rm = next(main_retries) + for _ in range(main_rm.retries + icd_rm.retries + 1): try: response = self._call_api( ep=ep, fatal=True, headers=headers, @@ -953,7 +959,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if not isinstance(e.cause, network_exceptions): return self._error_or_warning(e, fatal=fatal) elif not isinstance(e.cause, HTTPError): - retry.error = e + main_rm.error = e + next(main_retries) continue first_bytes = e.cause.response.read(512) @@ -965,27 +972,32 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if yt_error: self._report_alerts([('ERROR', yt_error)], fatal=False) # Downloading page may result in intermittent 5xx HTTP error - # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289 + # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289 # We also want to catch all other network exceptions since errors in later pages can be troublesome # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210 if e.cause.status not in (403, 429): - retry.error = e + main_rm.error = e + next(main_retries) continue return self._error_or_warning(e, fatal=fatal) try: self._extract_and_report_alerts(response, only_once=True) except ExtractorError as e: - # YouTube servers may return errors we want to retry on in a 200 OK response + # YouTube's servers may return errors we want to retry on in a 200 OK response # See: https://github.com/yt-dlp/yt-dlp/issues/839 if 'unknown error' in e.msg.lower(): - retry.error = e + main_rm.error = e + next(main_retries) continue return self._error_or_warning(e, fatal=fatal) # Youtube sometimes sends incomplete data # See: https://github.com/ytdl-org/youtube-dl/issues/28194 if not traverse_obj(response, *variadic(check_get_keys)): - retry.error = ExtractorError('Incomplete data received', expected=True) + icd_rm.error = ExtractorError('Incomplete data received', expected=True) + should_retry = next(icd_retries, None) + if not should_retry: + return None continue return response From cc8d8441524ec3442d7c0d3f8f33f15b66aa06f3 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Tue, 3 Oct 2023 11:33:40 +0200 Subject: [PATCH 28/41] [ie/xhamster:user] Support creator urls (#8232) Authored by: Grub4K --- yt_dlp/extractor/xhamster.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/xhamster.py b/yt_dlp/extractor/xhamster.py index 37224799b..aec1f20bb 100644 --- a/yt_dlp/extractor/xhamster.py +++ b/yt_dlp/extractor/xhamster.py @@ -407,7 +407,7 @@ class XHamsterEmbedIE(InfoExtractor): class XHamsterUserIE(InfoExtractor): - _VALID_URL = r'https?://(?:.+?\.)?%s/users/(?P[^/?#&]+)' % XHamsterIE._DOMAINS + _VALID_URL = rf'https?://(?:[^/?#]+\.)?{XHamsterIE._DOMAINS}/(?:(?Pusers)|creators)/(?P[^/?#&]+)' _TESTS = [{ # Paginated user profile 'url': 'https://xhamster.com/users/netvideogirls/videos', @@ -422,6 +422,12 @@ class XHamsterUserIE(InfoExtractor): 'id': 'firatkaan', }, 'playlist_mincount': 1, + }, { + 'url': 'https://xhamster.com/creators/squirt-orgasm-69', + 'info_dict': { + 'id': 'squirt-orgasm-69', + }, + 'playlist_mincount': 150, }, { 'url': 'https://xhday.com/users/mobhunter', 'only_matching': True, @@ -430,8 +436,9 @@ class XHamsterUserIE(InfoExtractor): 'only_matching': True, }] - def _entries(self, user_id): - next_page_url = 'https://xhamster.com/users/%s/videos/1' % user_id + def _entries(self, user_id, is_user): + prefix, suffix = ('users', 'videos') if is_user else ('creators', 'exclusive') + next_page_url = f'https://xhamster.com/{prefix}/{user_id}/{suffix}/1' for pagenum in itertools.count(1): page = self._download_webpage( next_page_url, user_id, 'Downloading page %s' % pagenum) @@ -454,5 +461,5 @@ class XHamsterUserIE(InfoExtractor): break def _real_extract(self, url): - user_id = self._match_id(url) - return self.playlist_result(self._entries(user_id), user_id) + user, user_id = self._match_valid_url(url).group('user', 'id') + return self.playlist_result(self._entries(user_id, bool(user)), user_id) From 0730d5a966fa8a937d84bfb7f68be5198acb039b Mon Sep 17 00:00:00 2001 From: bashonly Date: Wed, 4 Oct 2023 12:44:13 -0500 Subject: [PATCH 29/41] [ie/gofile] Fix token cookie bug Authored by: bashonly --- yt_dlp/extractor/gofile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/gofile.py b/yt_dlp/extractor/gofile.py index 898390583..ef14b57d0 100644 --- a/yt_dlp/extractor/gofile.py +++ b/yt_dlp/extractor/gofile.py @@ -60,7 +60,7 @@ class GofileIE(InfoExtractor): account_data = self._download_json( 'https://api.gofile.io/createAccount', None, note='Getting a new guest account') self._TOKEN = account_data['data']['token'] - self._set_cookie('gofile.io', 'accountToken', self._TOKEN) + self._set_cookie('.gofile.io', 'accountToken', self._TOKEN) def _entries(self, file_id): query_params = { From b095fd3fa9d58a65dc9b830bd63b9d909422aa86 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 4 Oct 2023 13:01:52 -0500 Subject: [PATCH 30/41] [ie/WrestleUniverseVOD] Call API with device ID (#8272) Closes #8271 Authored by: bashonly --- yt_dlp/extractor/wrestleuniverse.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/yt_dlp/extractor/wrestleuniverse.py b/yt_dlp/extractor/wrestleuniverse.py index dd12804db..145246a14 100644 --- a/yt_dlp/extractor/wrestleuniverse.py +++ b/yt_dlp/extractor/wrestleuniverse.py @@ -190,10 +190,7 @@ class WrestleUniverseVODIE(WrestleUniverseBaseIE): def _real_extract(self, url): lang, video_id = self._match_valid_url(url).group('lang', 'id') metadata = self._download_metadata(url, video_id, lang, 'videoEpisodeFallbackData') - video_data = self._call_api(video_id, ':watch', 'watch', data={ - # 'deviceId' is required if ignoreDeviceRestriction is False - 'ignoreDeviceRestriction': True, - }) + video_data = self._call_api(video_id, ':watch', 'watch', data={'deviceId': self._DEVICE_ID}) return { 'id': video_id, From 91a670a4f7babe9c8aa2018f57d8c8952a6f49d8 Mon Sep 17 00:00:00 2001 From: gillux Date: Sat, 7 Oct 2023 06:27:54 +0800 Subject: [PATCH 31/41] [ie/LiTV] Fix extractor (#7785) Closes #5456 Authored by: jiru --- yt_dlp/extractor/litv.py | 48 ++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/yt_dlp/extractor/litv.py b/yt_dlp/extractor/litv.py index 19b298ec6..2c7c7175e 100644 --- a/yt_dlp/extractor/litv.py +++ b/yt_dlp/extractor/litv.py @@ -13,7 +13,7 @@ from ..utils import ( class LiTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P[^&]+)' - _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s' + _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?content_id=%s' _TESTS = [{ 'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1', @@ -21,16 +21,18 @@ class LiTVIE(InfoExtractor): 'id': 'VOD00041606', 'title': '花千骨', }, - 'playlist_count': 50, + 'playlist_count': 51, # 50 episodes + 1 trailer }, { 'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1', - 'md5': '969e343d9244778cb29acec608e53640', + 'md5': 'b90ff1e9f1d8f5cfcd0a44c3e2b34c7a', 'info_dict': { 'id': 'VOD00041610', 'ext': 'mp4', 'title': '花千骨第1集', 'thumbnail': r're:https?://.*\.jpg$', - 'description': 'md5:c7017aa144c87467c4fb2909c4b05d6f', + 'description': '《花千骨》陸劇線上看。十六年前,平靜的村莊內,一名女嬰隨異相出生,途徑此地的蜀山掌門清虛道長算出此女命運非同一般,她體內散發的異香易招惹妖魔。一念慈悲下,他在村莊周邊設下結界阻擋妖魔入侵,讓其年滿十六後去蜀山,並賜名花千骨。', + 'categories': ['奇幻', '愛情', '中國', '仙俠'], + 'episode': 'Episode 1', 'episode_number': 1, }, 'params': { @@ -46,20 +48,17 @@ class LiTVIE(InfoExtractor): 'title': '芈月傳第1集 霸星芈月降世楚國', 'description': '楚威王二年,太史令唐昧夜觀星象,發現霸星即將現世。王后得知霸星的預言後,想盡辦法不讓孩子順利出生,幸得莒姬相護化解危機。沒想到眾人期待下出生的霸星卻是位公主,楚威王對此失望至極。楚王后命人將女嬰丟棄河中,居然奇蹟似的被少司命像攔下,楚威王認為此女非同凡響,為她取名芈月。', }, - 'skip': 'Georestricted to Taiwan', + 'skip': 'No longer exists', }] - def _extract_playlist(self, season_list, video_id, program_info, prompt=True): - episode_title = program_info['title'] - content_id = season_list['contentId'] - + def _extract_playlist(self, playlist_data, content_type): all_episodes = [ self.url_result(smuggle_url( - self._URL_TEMPLATE % (program_info['contentType'], episode['contentId']), + self._URL_TEMPLATE % (content_type, episode['contentId']), {'force_noplaylist': True})) # To prevent infinite recursion - for episode in season_list['episode']] + for episode in traverse_obj(playlist_data, ('seasons', ..., 'episode', lambda _, v: v['contentId']))] - return self.playlist_result(all_episodes, content_id, episode_title) + return self.playlist_result(all_episodes, playlist_data['contentId'], playlist_data.get('title')) def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) @@ -68,24 +67,31 @@ class LiTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) + if self._search_regex( + r'(?i)]*http-equiv="refresh"\s[^>]*content="[0-9]+;\s*url=https://www\.litv\.tv/"', + webpage, 'meta refresh redirect', default=False, group=0): + raise ExtractorError('No such content found', expected=True) + program_info = self._parse_json(self._search_regex( r'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'), video_id) - season_list = list(program_info.get('seasonList', {}).values()) - playlist_id = traverse_obj(season_list, 0, 'contentId') - if self._yes_playlist(playlist_id, video_id, smuggled_data): - return self._extract_playlist(season_list[0], video_id, program_info) - - # In browsers `getMainUrl` request is always issued. Usually this + # In browsers `getProgramInfo` request is always issued. Usually this # endpoint gives the same result as the data embedded in the webpage. - # If georestricted, there are no embedded data, so an extra request is - # necessary to get the error code + # If, for some reason, there are no embedded data, we do an extra request. if 'assetId' not in program_info: program_info = self._download_json( 'https://www.litv.tv/vod/ajax/getProgramInfo', video_id, query={'contentId': video_id}, headers={'Accept': 'application/json'}) + + series_id = program_info['seriesId'] + if self._yes_playlist(series_id, video_id, smuggled_data): + playlist_data = self._download_json( + 'https://www.litv.tv/vod/ajax/getSeriesTree', video_id, + query={'seriesId': series_id}, headers={'Accept': 'application/json'}) + return self._extract_playlist(playlist_data, program_info['contentType']) + video_data = self._parse_json(self._search_regex( r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);', webpage, 'video data', default='{}'), video_id) @@ -96,7 +102,7 @@ class LiTVIE(InfoExtractor): 'contentType': program_info['contentType'], } video_data = self._download_json( - 'https://www.litv.tv/vod/getMainUrl', video_id, + 'https://www.litv.tv/vod/ajax/getMainUrlNoAuth', video_id, data=json.dumps(payload).encode('utf-8'), headers={'Content-Type': 'application/json'}) From f980df734cf5c0eaded2f7b38c6c60bccfeebb48 Mon Sep 17 00:00:00 2001 From: c-basalt <117849907+c-basalt@users.noreply.github.com> Date: Fri, 6 Oct 2023 18:31:33 -0400 Subject: [PATCH 32/41] [ie/neteasemusic] Fix extractors (#8181) Closes #4388 Authored by: c-basalt --- yt_dlp/extractor/neteasemusic.py | 575 +++++++++++++++++-------------- 1 file changed, 312 insertions(+), 263 deletions(-) diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py index 5b7307bc8..68bfcb6ba 100644 --- a/yt_dlp/extractor/neteasemusic.py +++ b/yt_dlp/extractor/neteasemusic.py @@ -2,105 +2,74 @@ import itertools import json import re import time -from base64 import b64encode -from binascii import hexlify -from datetime import datetime from hashlib import md5 from random import randint from .common import InfoExtractor from ..aes import aes_ecb_encrypt, pkcs7_padding -from ..compat import compat_urllib_parse_urlencode -from ..networking import Request from ..utils import ( ExtractorError, - bytes_to_intlist, - error_to_compat_str, - float_or_none, int_or_none, - intlist_to_bytes, - try_get, + join_nonempty, + str_or_none, + strftime_or_none, + traverse_obj, + unified_strdate, + url_or_none, + urljoin, + variadic, ) class NetEaseMusicBaseIE(InfoExtractor): _FORMATS = ['bMusic', 'mMusic', 'hMusic'] - _NETEASE_SALT = '3go8&$8*3*3h0k(2)2' _API_BASE = 'http://music.163.com/api/' + _GEO_BYPASS = False - @classmethod - def _encrypt(cls, dfsid): - salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8')) - string_bytes = bytearray(str(dfsid).encode('ascii')) - salt_len = len(salt_bytes) - for i in range(len(string_bytes)): - string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len] - m = md5() - m.update(bytes(string_bytes)) - result = b64encode(m.digest()).decode('ascii') - return result.replace('/', '_').replace('+', '-') + @staticmethod + def kilo_or_none(value): + return int_or_none(value, scale=1000) - def make_player_api_request_data_and_headers(self, song_id, bitrate): - KEY = b'e82ckenh8dichen8' - URL = '/api/song/enhance/player/url' - now = int(time.time() * 1000) - rand = randint(0, 1000) - cookie = { - 'osver': None, - 'deviceId': None, + def _create_eapi_cipher(self, api_path, query_body, cookies): + request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':')) + + message = f'nobody{api_path}use{request_text}md5forencrypt'.encode('latin1') + msg_digest = md5(message).hexdigest() + + data = pkcs7_padding(list(str.encode( + f'{api_path}-36cd479b6b5-{request_text}-36cd479b6b5-{msg_digest}'))) + encrypted = bytes(aes_ecb_encrypt(data, list(b'e82ckenh8dichen8'))) + return f'params={encrypted.hex().upper()}'.encode() + + def _download_eapi_json(self, path, video_id, query_body, headers={}, **kwargs): + cookies = { + 'osver': 'undefined', + 'deviceId': 'undefined', 'appver': '8.0.0', 'versioncode': '140', - 'mobilename': None, + 'mobilename': 'undefined', 'buildver': '1623435496', 'resolution': '1920x1080', '__csrf': '', 'os': 'pc', - 'channel': None, - 'requestId': '{0}_{1:04}'.format(now, rand), + 'channel': 'undefined', + 'requestId': f'{int(time.time() * 1000)}_{randint(0, 1000):04}', + **traverse_obj(self._get_cookies(self._API_BASE), { + 'MUSIC_U': ('MUSIC_U', {lambda i: i.value}), + }) } - request_text = json.dumps( - {'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie}, - separators=(',', ':')) - message = 'nobody{0}use{1}md5forencrypt'.format( - URL, request_text).encode('latin1') - msg_digest = md5(message).hexdigest() - - data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format( - URL, request_text, msg_digest) - data = pkcs7_padding(bytes_to_intlist(data)) - encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY))) - encrypted_params = hexlify(encrypted).decode('ascii').upper() - - cookie = '; '.join( - ['{0}={1}'.format(k, v if v is not None else 'undefined') - for [k, v] in cookie.items()]) - - headers = { - 'User-Agent': self.extractor.get_param('http_headers')['User-Agent'], - 'Content-Type': 'application/x-www-form-urlencoded', - 'Referer': 'https://music.163.com', - 'Cookie': cookie, - } - return ('params={0}'.format(encrypted_params), headers) + return self._download_json( + urljoin('https://interface3.music.163.com/', f'/eapi{path}'), video_id, + data=self._create_eapi_cipher(f'/api{path}', query_body, cookies), headers={ + 'Referer': 'https://music.163.com', + 'Cookie': '; '.join([f'{k}={v}' for k, v in cookies.items()]), + **headers, + }, **kwargs) def _call_player_api(self, song_id, bitrate): - url = 'https://interface3.music.163.com/eapi/song/enhance/player/url' - data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate) - try: - msg = 'empty result' - result = self._download_json( - url, song_id, data=data.encode('ascii'), headers=headers) - if result: - return result - except ExtractorError as e: - if type(e.cause) in (ValueError, TypeError): - # JSON load failure - raise - except Exception as e: - msg = error_to_compat_str(e) - self.report_warning('%s API call (%s) failed: %s' % ( - song_id, bitrate, msg)) - return {} + return self._download_eapi_json( + '/song/enhance/player/url', song_id, {'ids': f'[{song_id}]', 'br': bitrate}, + note=f'Downloading song URL info: bitrate {bitrate}') def extract_formats(self, info): err = 0 @@ -110,45 +79,50 @@ class NetEaseMusicBaseIE(InfoExtractor): details = info.get(song_format) if not details: continue - bitrate = int_or_none(details.get('bitrate')) or 999000 - data = self._call_player_api(song_id, bitrate) - for song in try_get(data, lambda x: x['data'], list) or []: - song_url = try_get(song, lambda x: x['url']) - if not song_url: - continue + for song in traverse_obj(self._call_player_api(song_id, bitrate), ('data', lambda _, v: url_or_none(v['url']))): + song_url = song['url'] if self._is_valid_url(song_url, info['id'], 'song'): formats.append({ 'url': song_url, - 'ext': details.get('extension'), - 'abr': float_or_none(song.get('br'), scale=1000), 'format_id': song_format, - 'filesize': int_or_none(song.get('size')), - 'asr': int_or_none(details.get('sr')), + 'asr': traverse_obj(details, ('sr', {int_or_none})), + **traverse_obj(song, { + 'ext': ('type', {str}), + 'abr': ('br', {self.kilo_or_none}), + 'filesize': ('size', {int_or_none}), + }), }) elif err == 0: - err = try_get(song, lambda x: x['code'], int) + err = traverse_obj(song, ('code', {int})) or 0 if not formats: - msg = 'No media links found' if err != 0 and (err < 200 or err >= 400): - raise ExtractorError( - '%s (site code %d)' % (msg, err, ), expected=True) + raise ExtractorError(f'No media links found (site code {err})', expected=True) else: self.raise_geo_restricted( - msg + ': probably this video is not available from your location due to geo restriction.', - countries=['CN']) - + 'No media links found: probably due to geo restriction.', countries=['CN']) return formats - @classmethod - def convert_milliseconds(cls, ms): - return int(round(ms / 1000.0)) - def query_api(self, endpoint, video_id, note): - req = Request('%s%s' % (self._API_BASE, endpoint)) - req.headers['Referer'] = self._API_BASE - return self._download_json(req, video_id, note) + result = self._download_json( + f'{self._API_BASE}{endpoint}', video_id, note, headers={'Referer': self._API_BASE}) + code = traverse_obj(result, ('code', {int})) + message = traverse_obj(result, ('message', {str})) or '' + if code == -462: + self.raise_login_required(f'Login required to download: {message}') + elif code != 200: + raise ExtractorError(f'Failed to get meta info: {code} {message}') + return result + + def _get_entries(self, songs_data, entry_keys=None, id_key='id', name_key='name'): + for song in traverse_obj(songs_data, ( + *variadic(entry_keys, (str, bytes, dict, set)), + lambda _, v: int_or_none(v[id_key]) is not None)): + song_id = str(song[id_key]) + yield self.url_result( + f'http://music.163.com/#/song?id={song_id}', NetEaseMusicIE, + song_id, traverse_obj(song, (name_key, {str}))) class NetEaseMusicIE(NetEaseMusicBaseIE): @@ -156,16 +130,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): IE_DESC = '网易云音乐' _VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P[0-9]+)' _TESTS = [{ - 'url': 'http://music.163.com/#/song?id=32102397', - 'md5': '3e909614ce09b1ccef4a3eb205441190', + 'url': 'https://music.163.com/#/song?id=548648087', 'info_dict': { - 'id': '32102397', + 'id': '548648087', 'ext': 'mp3', - 'title': 'Bad Blood', - 'creator': 'Taylor Swift / Kendrick Lamar', - 'upload_date': '20150516', - 'timestamp': 1431792000, - 'description': 'md5:25fc5f27e47aad975aa6d36382c7833c', + 'title': '戒烟 (Live)', + 'creator': '李荣浩 / 朱正廷 / 陈立农 / 尤长靖 / ONER灵超 / ONER木子洋 / 杨非同 / 陆定昊', + 'timestamp': 1522944000, + 'upload_date': '20180405', + 'description': 'md5:3650af9ee22c87e8637cb2dde22a765c', + 'subtitles': {'lyrics': [{'ext': 'lrc'}]}, + "duration": 256, + 'thumbnail': r're:^http.*\.jpg', }, }, { 'note': 'No lyrics.', @@ -176,21 +152,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): 'title': 'Opus 28', 'creator': 'Dustin O\'Halloran', 'upload_date': '20080211', - 'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4', 'timestamp': 1202745600, - }, - }, { - 'note': 'Has translated name.', - 'url': 'http://music.163.com/#/song?id=22735043', - 'info_dict': { - 'id': '22735043', - 'ext': 'mp3', - 'title': '소원을 말해봐 (Genie)', - 'creator': '少女时代', - 'description': 'md5:79d99cc560e4ca97e0c4d86800ee4184', - 'upload_date': '20100127', - 'timestamp': 1264608000, - 'alt_title': '说出愿望吧(Genie)', + 'duration': 263, + 'thumbnail': r're:^http.*\.jpg', }, }, { 'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846', @@ -203,59 +167,99 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): 'upload_date': '19911130', 'timestamp': 691516800, 'description': 'md5:1ba2f911a2b0aa398479f595224f2141', + 'subtitles': {'lyrics': [{'ext': 'lrc'}]}, + 'duration': 268, + 'alt_title': '伴唱:现代人乐队 合唱:总政歌舞团', + 'thumbnail': r're:^http.*\.jpg', }, + }, { + 'url': 'http://music.163.com/#/song?id=32102397', + 'md5': '3e909614ce09b1ccef4a3eb205441190', + 'info_dict': { + 'id': '32102397', + 'ext': 'mp3', + 'title': 'Bad Blood', + 'creator': 'Taylor Swift / Kendrick Lamar', + 'upload_date': '20150516', + 'timestamp': 1431792000, + 'description': 'md5:21535156efb73d6d1c355f95616e285a', + 'subtitles': {'lyrics': [{'ext': 'lrc'}]}, + 'duration': 199, + 'thumbnail': r're:^http.*\.jpg', + }, + 'skip': 'Blocked outside Mainland China', + }, { + 'note': 'Has translated name.', + 'url': 'http://music.163.com/#/song?id=22735043', + 'info_dict': { + 'id': '22735043', + 'ext': 'mp3', + 'title': '소원을 말해봐 (Genie)', + 'creator': '少女时代', + 'upload_date': '20100127', + 'timestamp': 1264608000, + 'description': 'md5:03d1ffebec3139aa4bafe302369269c5', + 'subtitles': {'lyrics': [{'ext': 'lrc'}]}, + 'duration': 229, + 'alt_title': '说出愿望吧(Genie)', + 'thumbnail': r're:^http.*\.jpg', + }, + 'skip': 'Blocked outside Mainland China', }] def _process_lyrics(self, lyrics_info): - original = lyrics_info.get('lrc', {}).get('lyric') - translated = lyrics_info.get('tlyric', {}).get('lyric') + original = traverse_obj(lyrics_info, ('lrc', 'lyric', {str})) + translated = traverse_obj(lyrics_info, ('tlyric', 'lyric', {str})) + + if not original or original == '[99:00.00]纯音乐,请欣赏\n': + return None if not translated: - return original + return { + 'lyrics': [{'data': original, 'ext': 'lrc'}], + } lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)' original_ts_texts = re.findall(lyrics_expr, original) - translation_ts_dict = dict( - (time_stamp, text) for time_stamp, text in re.findall(lyrics_expr, translated) - ) - lyrics = '\n'.join([ - '%s%s / %s' % (time_stamp, text, translation_ts_dict.get(time_stamp, '')) - for time_stamp, text in original_ts_texts - ]) - return lyrics + translation_ts_dict = dict(re.findall(lyrics_expr, translated)) + + merged = '\n'.join( + join_nonempty(f'{timestamp}{text}', translation_ts_dict.get(timestamp, ''), delim=' / ') + for timestamp, text in original_ts_texts) + + return { + 'lyrics_merged': [{'data': merged, 'ext': 'lrc'}], + 'lyrics': [{'data': original, 'ext': 'lrc'}], + 'lyrics_translated': [{'data': translated, 'ext': 'lrc'}], + } def _real_extract(self, url): song_id = self._match_id(url) - params = { - 'id': song_id, - 'ids': '[%s]' % song_id - } info = self.query_api( - 'song/detail?' + compat_urllib_parse_urlencode(params), - song_id, 'Downloading song info')['songs'][0] + f'song/detail?id={song_id}&ids=%5B{song_id}%5D', song_id, 'Downloading song info')['songs'][0] formats = self.extract_formats(info) - lyrics_info = self.query_api( - 'song/lyric?id=%s&lv=-1&tv=-1' % song_id, - song_id, 'Downloading lyrics data') - lyrics = self._process_lyrics(lyrics_info) - - alt_title = None - if info.get('transNames'): - alt_title = '/'.join(info.get('transNames')) + lyrics = self._process_lyrics(self.query_api( + f'song/lyric?id={song_id}&lv=-1&tv=-1', song_id, 'Downloading lyrics data')) + lyric_data = { + 'description': traverse_obj(lyrics, (('lyrics_merged', 'lyrics'), 0, 'data'), get_all=False), + 'subtitles': lyrics, + } if lyrics else {} return { 'id': song_id, - 'title': info['name'], - 'alt_title': alt_title, - 'creator': ' / '.join([artist['name'] for artist in info.get('artists', [])]), - 'timestamp': self.convert_milliseconds(info.get('album', {}).get('publishTime')), - 'thumbnail': info.get('album', {}).get('picUrl'), - 'duration': self.convert_milliseconds(info.get('duration', 0)), - 'description': lyrics, 'formats': formats, + 'alt_title': '/'.join(traverse_obj(info, (('transNames', 'alias'), ...))) or None, + 'creator': ' / '.join(traverse_obj(info, ('artists', ..., 'name'))) or None, + **lyric_data, + **traverse_obj(info, { + 'title': ('name', {str}), + 'timestamp': ('album', 'publishTime', {self.kilo_or_none}), + 'thumbnail': ('album', 'picUrl', {url_or_none}), + 'duration': ('duration', {self.kilo_or_none}), + }), } @@ -263,31 +267,44 @@ class NetEaseMusicAlbumIE(NetEaseMusicBaseIE): IE_NAME = 'netease:album' IE_DESC = '网易云音乐 - 专辑' _VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P[0-9]+)' - _TEST = { + _TESTS = [{ + 'url': 'https://music.163.com/#/album?id=133153666', + 'info_dict': { + 'id': '133153666', + 'title': '桃几的翻唱', + 'upload_date': '20210913', + 'description': '桃几2021年翻唱合集', + 'thumbnail': r're:^http.*\.jpg', + }, + 'playlist_mincount': 13, + }, { 'url': 'http://music.163.com/#/album?id=220780', 'info_dict': { 'id': '220780', - 'title': 'B\'day', + 'title': 'B\'Day', + 'upload_date': '20060904', + 'description': 'md5:71a74e1d8f392d88cf1bbe48879ad0b0', + 'thumbnail': r're:^http.*\.jpg', }, 'playlist_count': 23, - 'skip': 'Blocked outside Mainland China', - } + }] def _real_extract(self, url): album_id = self._match_id(url) + webpage = self._download_webpage(f'https://music.163.com/album?id={album_id}', album_id) - info = self.query_api( - 'album/%s?id=%s' % (album_id, album_id), - album_id, 'Downloading album data')['album'] - - name = info['name'] - desc = info.get('description') - entries = [ - self.url_result('http://music.163.com/#/song?id=%s' % song['id'], - 'NetEaseMusic', song['id']) - for song in info['songs'] - ] - return self.playlist_result(entries, album_id, name, desc) + songs = self._search_json( + r']+\bid="song-list-pre-data"[^>]*>', webpage, 'metainfo', album_id, + end_pattern=r'', contains_pattern=r'\[(?s:.+)\]') + metainfo = { + 'title': self._og_search_property('title', webpage, 'title', fatal=False), + 'description': self._html_search_regex( + (rf']+\bid="album-desc-{suffix}"[^>]*>(.*?)' for suffix in ('more', 'dot')), + webpage, 'description', flags=re.S, fatal=False), + 'thumbnail': self._og_search_property('image', webpage, 'thumbnail', fatal=False), + 'upload_date': unified_strdate(self._html_search_meta('music:release_date', webpage, 'date', fatal=False)), + } + return self.playlist_result(self._get_entries(songs), album_id, **metainfo) class NetEaseMusicSingerIE(NetEaseMusicBaseIE): @@ -299,10 +316,9 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE): 'url': 'http://music.163.com/#/artist?id=10559', 'info_dict': { 'id': '10559', - 'title': '张惠妹 - aMEI;阿密特', + 'title': '张惠妹 - aMEI;阿妹;阿密特', }, 'playlist_count': 50, - 'skip': 'Blocked outside Mainland China', }, { 'note': 'Singer has translated name.', 'url': 'http://music.163.com/#/artist?id=124098', @@ -311,28 +327,28 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE): 'title': '李昇基 - 이승기', }, 'playlist_count': 50, - 'skip': 'Blocked outside Mainland China', + }, { + 'note': 'Singer with both translated and alias', + 'url': 'https://music.163.com/#/artist?id=159692', + 'info_dict': { + 'id': '159692', + 'title': '初音ミク - 初音未来;Hatsune Miku', + }, + 'playlist_count': 50, }] def _real_extract(self, url): singer_id = self._match_id(url) info = self.query_api( - 'artist/%s?id=%s' % (singer_id, singer_id), - singer_id, 'Downloading singer data') + f'artist/{singer_id}?id={singer_id}', singer_id, note='Downloading singer data') - name = info['artist']['name'] - if info['artist']['trans']: - name = '%s - %s' % (name, info['artist']['trans']) - if info['artist']['alias']: - name = '%s - %s' % (name, ';'.join(info['artist']['alias'])) + name = join_nonempty( + traverse_obj(info, ('artist', 'name', {str})), + join_nonempty(*traverse_obj(info, ('artist', ('trans', ('alias', ...)), {str})), delim=';'), + delim=' - ') - entries = [ - self.url_result('http://music.163.com/#/song?id=%s' % song['id'], - 'NetEaseMusic', song['id']) - for song in info['hotSongs'] - ] - return self.playlist_result(entries, singer_id, name) + return self.playlist_result(self._get_entries(info, 'hotSongs'), singer_id, name) class NetEaseMusicListIE(NetEaseMusicBaseIE): @@ -344,10 +360,28 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE): 'info_dict': { 'id': '79177352', 'title': 'Billboard 2007 Top 100', - 'description': 'md5:12fd0819cab2965b9583ace0f8b7b022' + 'description': 'md5:12fd0819cab2965b9583ace0f8b7b022', + 'tags': ['欧美'], + 'uploader': '浑然破灭', + 'uploader_id': '67549805', + 'timestamp': int, + 'upload_date': r're:\d{8}', }, - 'playlist_count': 99, - 'skip': 'Blocked outside Mainland China', + 'playlist_mincount': 95, + }, { + 'note': 'Toplist/Charts sample', + 'url': 'https://music.163.com/#/discover/toplist?id=60198', + 'info_dict': { + 'id': '60198', + 'title': 're:美国Billboard榜 [0-9]{4}-[0-9]{2}-[0-9]{2}', + 'description': '美国Billboard排行榜', + 'tags': ['流行', '欧美', '榜单'], + 'uploader': 'Billboard公告牌', + 'uploader_id': '48171', + 'timestamp': int, + 'upload_date': r're:\d{8}', + }, + 'playlist_count': 100, }, { 'note': 'Toplist/Charts sample', 'url': 'http://music.163.com/#/discover/toplist?id=3733003', @@ -363,64 +397,86 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE): def _real_extract(self, url): list_id = self._match_id(url) - info = self.query_api( - 'playlist/detail?id=%s&lv=-1&tv=-1' % list_id, - list_id, 'Downloading playlist data')['result'] + info = self._download_eapi_json( + '/v3/playlist/detail', list_id, + {'id': list_id, 't': '-1', 'n': '500', 's': '0'}, + note="Downloading playlist info") - name = info['name'] - desc = info.get('description') + metainfo = traverse_obj(info, ('playlist', { + 'title': ('name', {str}), + 'description': ('description', {str}), + 'tags': ('tags', ..., {str}), + 'uploader': ('creator', 'nickname', {str}), + 'uploader_id': ('creator', 'userId', {str_or_none}), + 'timestamp': ('updateTime', {self.kilo_or_none}), + })) + if traverse_obj(info, ('playlist', 'specialType')) == 10: + metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}' - if info.get('specialType') == 10: # is a chart/toplist - datestamp = datetime.fromtimestamp( - self.convert_milliseconds(info['updateTime'])).strftime('%Y-%m-%d') - name = '%s %s' % (name, datestamp) - - entries = [ - self.url_result('http://music.163.com/#/song?id=%s' % song['id'], - 'NetEaseMusic', song['id']) - for song in info['tracks'] - ] - return self.playlist_result(entries, list_id, name, desc) + return self.playlist_result(self._get_entries(info, ('playlist', 'tracks')), list_id, **metainfo) class NetEaseMusicMvIE(NetEaseMusicBaseIE): IE_NAME = 'netease:mv' IE_DESC = '网易云音乐 - MV' _VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P[0-9]+)' - _TEST = { + _TESTS = [{ + 'url': 'https://music.163.com/#/mv?id=10958064', + 'info_dict': { + 'id': '10958064', + 'ext': 'mp4', + 'title': '交换余生', + 'description': 'md5:e845872cff28820642a2b02eda428fea', + 'creator': '林俊杰', + 'upload_date': '20200916', + 'thumbnail': r're:http.*\.jpg', + 'duration': 364, + 'view_count': int, + 'like_count': int, + 'comment_count': int, + }, + }, { 'url': 'http://music.163.com/#/mv?id=415350', 'info_dict': { 'id': '415350', 'ext': 'mp4', 'title': '이럴거면 그러지말지', 'description': '白雅言自作曲唱甜蜜爱情', - 'creator': '白雅言', + 'creator': '白娥娟', 'upload_date': '20150520', + 'thumbnail': r're:http.*\.jpg', + 'duration': 216, + 'view_count': int, + 'like_count': int, + 'comment_count': int, }, - 'skip': 'Blocked outside Mainland China', - } + }] def _real_extract(self, url): mv_id = self._match_id(url) info = self.query_api( - 'mv/detail?id=%s&type=mp4' % mv_id, - mv_id, 'Downloading mv info')['data'] + f'mv/detail?id={mv_id}&type=mp4', mv_id, 'Downloading mv info')['data'] formats = [ - {'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)} + {'url': mv_url, 'ext': 'mp4', 'format_id': f'{brs}p', 'height': int_or_none(brs)} for brs, mv_url in info['brs'].items() ] return { 'id': mv_id, - 'title': info['name'], - 'description': info.get('desc') or info.get('briefDesc'), - 'creator': info['artistName'], - 'upload_date': info['publishTime'].replace('-', ''), 'formats': formats, - 'thumbnail': info.get('cover'), - 'duration': self.convert_milliseconds(info.get('duration', 0)), + **traverse_obj(info, { + 'title': ('name', {str}), + 'description': (('desc', 'briefDesc'), {str}, {lambda x: x or None}), + 'creator': ('artistName', {str}), + 'upload_date': ('publishTime', {unified_strdate}), + 'thumbnail': ('cover', {url_or_none}), + 'duration': ('duration', {self.kilo_or_none}), + 'view_count': ('playCount', {int_or_none}), + 'like_count': ('likeCount', {int_or_none}), + 'comment_count': ('commentCount', {int_or_none}), + }, get_all=False), } @@ -431,75 +487,74 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE): _TESTS = [{ 'url': 'http://music.163.com/#/program?id=10109055', 'info_dict': { - 'id': '10109055', + 'id': '32593346', 'ext': 'mp3', 'title': '不丹足球背后的故事', 'description': '喜马拉雅人的足球梦 ...', 'creator': '大话西藏', - 'timestamp': 1434179342, + 'timestamp': 1434179287, 'upload_date': '20150613', + 'thumbnail': r're:http.*\.jpg', 'duration': 900, }, - 'skip': 'Blocked outside Mainland China', }, { 'note': 'This program has accompanying songs.', 'url': 'http://music.163.com/#/program?id=10141022', 'info_dict': { 'id': '10141022', - 'title': '25岁,你是自在如风的少年<27°C>', + 'title': '滚滚电台的有声节目', 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b', + 'creator': '滚滚电台ORZ', + 'timestamp': 1434450733, + 'upload_date': '20150616', + 'thumbnail': r're:http.*\.jpg', }, 'playlist_count': 4, - 'skip': 'Blocked outside Mainland China', }, { 'note': 'This program has accompanying songs.', 'url': 'http://music.163.com/#/program?id=10141022', 'info_dict': { - 'id': '10141022', + 'id': '32647209', 'ext': 'mp3', - 'title': '25岁,你是自在如风的少年<27°C>', + 'title': '滚滚电台的有声节目', 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b', - 'timestamp': 1434450841, + 'creator': '滚滚电台ORZ', + 'timestamp': 1434450733, 'upload_date': '20150616', + 'thumbnail': r're:http.*\.jpg', + 'duration': 1104, }, 'params': { 'noplaylist': True }, - 'skip': 'Blocked outside Mainland China', }] def _real_extract(self, url): program_id = self._match_id(url) info = self.query_api( - 'dj/program/detail?id=%s' % program_id, - program_id, 'Downloading program info')['program'] + f'dj/program/detail?id={program_id}', program_id, note='Downloading program info')['program'] - name = info['name'] - description = info['description'] + metainfo = traverse_obj(info, { + 'title': ('name', {str}), + 'description': ('description', {str}), + 'creator': ('dj', 'brand', {str}), + 'thumbnail': ('coverUrl', {url_or_none}), + 'timestamp': ('createTime', {self.kilo_or_none}), + }) if not self._yes_playlist(info['songs'] and program_id, info['mainSong']['id']): formats = self.extract_formats(info['mainSong']) return { - 'id': info['mainSong']['id'], - 'title': name, - 'description': description, - 'creator': info['dj']['brand'], - 'timestamp': self.convert_milliseconds(info['createTime']), - 'thumbnail': info['coverUrl'], - 'duration': self.convert_milliseconds(info.get('duration', 0)), + 'id': str(info['mainSong']['id']), 'formats': formats, + 'duration': traverse_obj(info, ('mainSong', 'duration', {self.kilo_or_none})), + **metainfo, } - song_ids = [info['mainSong']['id']] - song_ids.extend([song['id'] for song in info['songs']]) - entries = [ - self.url_result('http://music.163.com/#/song?id=%s' % song_id, - 'NetEaseMusic', song_id) - for song_id in song_ids - ] - return self.playlist_result(entries, program_id, name, description) + songs = traverse_obj(info, (('mainSong', ('songs', ...)),)) + return self.playlist_result(self._get_entries(songs), program_id, **metainfo) class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE): @@ -511,38 +566,32 @@ class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE): 'info_dict': { 'id': '42', 'title': '声音蔓延', - 'description': 'md5:766220985cbd16fdd552f64c578a6b15' + 'description': 'md5:c7381ebd7989f9f367668a5aee7d5f08' }, 'playlist_mincount': 40, - 'skip': 'Blocked outside Mainland China', } _PAGE_SIZE = 1000 def _real_extract(self, url): dj_id = self._match_id(url) - name = None - desc = None + metainfo = {} entries = [] for offset in itertools.count(start=0, step=self._PAGE_SIZE): info = self.query_api( - 'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d' - % (self._PAGE_SIZE, dj_id, offset), - dj_id, 'Downloading dj programs - %d' % offset) + f'dj/program/byradio?asc=false&limit={self._PAGE_SIZE}&radioId={dj_id}&offset={offset}', + dj_id, note=f'Downloading dj programs - {offset}') - entries.extend([ - self.url_result( - 'http://music.163.com/#/program?id=%s' % program['id'], - 'NetEaseMusicProgram', program['id']) - for program in info['programs'] - ]) - - if name is None: - radio = info['programs'][0]['radio'] - name = radio['name'] - desc = radio['desc'] + entries.extend(self.url_result( + f'http://music.163.com/#/program?id={program["id"]}', NetEaseMusicProgramIE, + program['id'], program.get('name')) for program in info['programs']) + if not metainfo: + metainfo = traverse_obj(info, ('programs', 0, 'radio', { + 'title': ('name', {str}), + 'description': ('desc', {str}), + })) if not info['more']: break - return self.playlist_result(entries, dj_id, name, desc) + return self.playlist_result(entries, dj_id, **metainfo) From a9efb4b8d74f3583450ffda0ee57259a47d39c70 Mon Sep 17 00:00:00 2001 From: xofe <22776566+xofe@users.noreply.github.com> Date: Fri, 6 Oct 2023 22:35:11 +0000 Subject: [PATCH 33/41] [ie/abc.net.au:iview] Improve `episode` extraction (#8201) Authored by: xofe --- yt_dlp/extractor/abc.py | 90 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 88 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py index d2cf5f7c5..9d527246a 100644 --- a/yt_dlp/extractor/abc.py +++ b/yt_dlp/extractor/abc.py @@ -181,18 +181,102 @@ class ABCIViewIE(InfoExtractor): _GEO_COUNTRIES = ['AU'] _TESTS = [{ + 'url': 'https://iview.abc.net.au/show/utopia/series/1/video/CO1211V001S00', + 'md5': '52a942bfd7a0b79a6bfe9b4ce6c9d0ed', + 'info_dict': { + 'id': 'CO1211V001S00', + 'ext': 'mp4', + 'title': 'Series 1 Ep 1 Wood For The Trees', + 'series': 'Utopia', + 'description': 'md5:0cfb2c183c1b952d1548fd65c8a95c00', + 'upload_date': '20230726', + 'uploader_id': 'abc1', + 'series_id': 'CO1211V', + 'episode_id': 'CO1211V001S00', + 'season_number': 1, + 'season': 'Season 1', + 'episode_number': 1, + 'episode': 'Wood For The Trees', + 'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/co/CO1211V001S00_5ad8353f4df09_1280.jpg', + 'timestamp': 1690403700, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'note': 'No episode name', 'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00', 'md5': '67715ce3c78426b11ba167d875ac6abf', 'info_dict': { 'id': 'LE1927H001S00', 'ext': 'mp4', - 'title': "Series 11 Ep 1", - 'series': "Gruen", + 'title': 'Series 11 Ep 1', + 'series': 'Gruen', 'description': 'md5:52cc744ad35045baf6aded2ce7287f67', 'upload_date': '20190925', 'uploader_id': 'abc1', + 'series_id': 'LE1927H', + 'episode_id': 'LE1927H001S00', + 'season_number': 11, + 'season': 'Season 11', + 'episode_number': 1, + 'episode': 'Episode 1', + 'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/le/LE1927H001S00_5d954fbd79e25_1280.jpg', 'timestamp': 1569445289, }, + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], + 'params': { + 'skip_download': True, + }, + }, { + 'note': 'No episode number', + 'url': 'https://iview.abc.net.au/show/four-corners/series/2022/video/NC2203H039S00', + 'md5': '77cb7d8434440e3b28fbebe331c2456a', + 'info_dict': { + 'id': 'NC2203H039S00', + 'ext': 'mp4', + 'title': 'Series 2022 Locking Up Kids', + 'series': 'Four Corners', + 'description': 'md5:54829ca108846d1a70e1fcce2853e720', + 'upload_date': '20221114', + 'uploader_id': 'abc1', + 'series_id': 'NC2203H', + 'episode_id': 'NC2203H039S00', + 'season_number': 2022, + 'season': 'Season 2022', + 'episode_number': None, + 'episode': 'Locking Up Kids', + 'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/nc/NC2203H039S00_636d8a0944a22_1920.jpg', + 'timestamp': 1668460497, + + }, + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], + 'params': { + 'skip_download': True, + }, + }, { + 'note': 'No episode name or number', + 'url': 'https://iview.abc.net.au/show/landline/series/2021/video/RF2004Q043S00', + 'md5': '2e17dec06b13cc81dc119d2565289396', + 'info_dict': { + 'id': 'RF2004Q043S00', + 'ext': 'mp4', + 'title': 'Series 2021', + 'series': 'Landline', + 'description': 'md5:c9f30d9c0c914a7fd23842f6240be014', + 'upload_date': '20211205', + 'uploader_id': 'abc1', + 'series_id': 'RF2004Q', + 'episode_id': 'RF2004Q043S00', + 'season_number': 2021, + 'season': 'Season 2021', + 'episode_number': None, + 'episode': None, + 'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/rf/RF2004Q043S00_61a950639dbc0_1920.jpg', + 'timestamp': 1638710705, + + }, + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], 'params': { 'skip_download': True, }, @@ -254,6 +338,8 @@ class ABCIViewIE(InfoExtractor): 'episode_number': int_or_none(self._search_regex( r'\bEp\s+(\d+)\b', title, 'episode number', default=None)), 'episode_id': house_number, + 'episode': self._search_regex( + r'^(?:Series\s+\d+)?\s*(?:Ep\s+\d+)?\s*(.*)$', title, 'episode', default='') or None, 'uploader_id': video_params.get('channel'), 'formats': formats, 'subtitles': subtitles, From 48cceec1ddb8649b5e771df8df79eb9c39c82b90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Droz?= Date: Fri, 6 Oct 2023 19:38:26 -0300 Subject: [PATCH 34/41] [ie/lbry] Add playlist support (#8213) Closes #5982, Closes #8204 Authored by: drzraf, bashonly, Grub4K --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/lbry.py | 184 ++++++++++++++++++++------------ 2 files changed, 116 insertions(+), 69 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 908abb8ac..ef6123e8a 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -951,6 +951,7 @@ from .lastfm import ( from .lbry import ( LBRYIE, LBRYChannelIE, + LBRYPlaylistIE, ) from .lci import LCIIE from .lcp import ( diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py index 9a9f9256f..ccce300b5 100644 --- a/yt_dlp/extractor/lbry.py +++ b/yt_dlp/extractor/lbry.py @@ -22,10 +22,11 @@ from ..utils import ( class LBRYBaseIE(InfoExtractor): - _BASE_URL_REGEX = r'(?:https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/|lbry://)' + _BASE_URL_REGEX = r'(?x)(?:https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/|lbry://)' _CLAIM_ID_REGEX = r'[0-9a-f]{1,40}' - _OPT_CLAIM_ID = '[^:/?#&]+(?:[:#]%s)?' % _CLAIM_ID_REGEX + _OPT_CLAIM_ID = '[^$@:/?#&]+(?:[:#]%s)?' % _CLAIM_ID_REGEX _SUPPORTED_STREAM_TYPES = ['video', 'audio'] + _PAGE_SIZE = 50 def _call_api_proxy(self, method, display_id, params, resource): headers = {'Content-Type': 'application/json-rpc'} @@ -77,10 +78,70 @@ class LBRYBaseIE(InfoExtractor): return info + def _fetch_page(self, display_id, url, params, page): + page += 1 + page_params = { + 'no_totals': True, + 'page': page, + 'page_size': self._PAGE_SIZE, + **params, + } + result = self._call_api_proxy( + 'claim_search', display_id, page_params, f'page {page}') + for item in traverse_obj(result, ('items', lambda _, v: v['name'] and v['claim_id'])): + yield { + **self._parse_stream(item, url), + '_type': 'url', + 'id': item['claim_id'], + 'url': self._permanent_url(url, item['name'], item['claim_id']), + } + + def _playlist_entries(self, url, display_id, claim_param, metadata): + qs = parse_qs(url) + content = qs.get('content', [None])[0] + params = { + 'fee_amount': qs.get('fee_amount', ['>=0'])[0], + 'order_by': { + 'new': ['release_time'], + 'top': ['effective_amount'], + 'trending': ['trending_group', 'trending_mixed'], + }[qs.get('order', ['new'])[0]], + 'claim_type': 'stream', + 'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES, + **claim_param, + } + duration = qs.get('duration', [None])[0] + if duration: + params['duration'] = { + 'long': '>=1200', + 'short': '<=240', + }[duration] + language = qs.get('language', ['all'])[0] + if language != 'all': + languages = [language] + if language == 'en': + languages.append('none') + params['any_languages'] = languages + + entries = OnDemandPagedList( + functools.partial(self._fetch_page, display_id, url, params), + self._PAGE_SIZE) + + return self.playlist_result( + entries, display_id, **traverse_obj(metadata, ('value', { + 'title': 'title', + 'description': 'description', + }))) + class LBRYIE(LBRYBaseIE): IE_NAME = 'lbry' - _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P\$/[^/]+/[^/]+/{1}|@{0}/{0}|(?!@){0})'.format(LBRYBaseIE._OPT_CLAIM_ID, LBRYBaseIE._CLAIM_ID_REGEX) + _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf''' + (?:\$/(?:download|embed)/)? + (?P + [^$@:/?#]+/{LBRYBaseIE._CLAIM_ID_REGEX} + |(?:@{LBRYBaseIE._OPT_CLAIM_ID}/)?{LBRYBaseIE._OPT_CLAIM_ID} + )''' _TESTS = [{ # Video 'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1', @@ -149,7 +210,7 @@ class LBRYIE(LBRYBaseIE): 'channel': 'Gardening In Canada', 'channel_id': 'b8be0e93b423dad221abe29545fbe8ec36e806bc', 'channel_url': 'https://odysee.com/@gardeningincanada:b8be0e93b423dad221abe29545fbe8ec36e806bc', - 'formats': 'mincount:3', + 'formats': 'mincount:3', # FIXME 'thumbnail': 'https://thumbnails.lbry.com/AgHSc_HzrrE', 'license': 'Copyrighted (contact publisher)', } @@ -184,12 +245,12 @@ class LBRYIE(LBRYBaseIE): 'id': '41fbfe805eb73c8d3012c0c49faa0f563274f634', 'ext': 'mp4', 'title': 'Biotechnological Invasion of Skin (April 2023)', - 'description': 'md5:709a2f4c07bd8891cda3a7cc2d6fcf5c', + 'description': 'md5:fe28689db2cb7ba3436d819ac3ffc378', 'channel': 'Wicked Truths', 'channel_id': '23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0', 'channel_url': 'https://odysee.com/@wickedtruths:23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0', - 'timestamp': 1685790036, - 'upload_date': '20230603', + 'timestamp': 1695114347, + 'upload_date': '20230919', 'release_timestamp': 1685617473, 'release_date': '20230601', 'duration': 1063, @@ -229,10 +290,10 @@ class LBRYIE(LBRYBaseIE): def _real_extract(self, url): display_id = self._match_id(url) - if display_id.startswith('$/'): - display_id = display_id.split('/', 2)[-1].replace('/', ':') - else: + if display_id.startswith('@'): display_id = display_id.replace(':', '#') + else: + display_id = display_id.replace('/', ':') display_id = urllib.parse.unquote(display_id) uri = 'lbry://' + display_id result = self._resolve_url(uri, display_id, 'stream') @@ -299,7 +360,7 @@ class LBRYIE(LBRYBaseIE): class LBRYChannelIE(LBRYBaseIE): IE_NAME = 'lbry:channel' - _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P@%s)/?(?:[?&]|$)' % LBRYBaseIE._OPT_CLAIM_ID + _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'(?P@{LBRYBaseIE._OPT_CLAIM_ID})/?(?:[?&]|$)' _TESTS = [{ 'url': 'https://lbry.tv/@LBRYFoundation:0', 'info_dict': { @@ -315,65 +376,50 @@ class LBRYChannelIE(LBRYBaseIE): 'url': 'lbry://@lbry#3f', 'only_matching': True, }] - _PAGE_SIZE = 50 - - def _fetch_page(self, claim_id, url, params, page): - page += 1 - page_params = { - 'channel_ids': [claim_id], - 'claim_type': 'stream', - 'no_totals': True, - 'page': page, - 'page_size': self._PAGE_SIZE, - } - page_params.update(params) - result = self._call_api_proxy( - 'claim_search', claim_id, page_params, 'page %d' % page) - for item in (result.get('items') or []): - stream_claim_name = item.get('name') - stream_claim_id = item.get('claim_id') - if not (stream_claim_name and stream_claim_id): - continue - - yield { - **self._parse_stream(item, url), - '_type': 'url', - 'id': stream_claim_id, - 'url': self._permanent_url(url, stream_claim_name, stream_claim_id), - } def _real_extract(self, url): display_id = self._match_id(url).replace(':', '#') - result = self._resolve_url( - 'lbry://' + display_id, display_id, 'channel') + result = self._resolve_url(f'lbry://{display_id}', display_id, 'channel') claim_id = result['claim_id'] - qs = parse_qs(url) - content = qs.get('content', [None])[0] - params = { - 'fee_amount': qs.get('fee_amount', ['>=0'])[0], - 'order_by': { - 'new': ['release_time'], - 'top': ['effective_amount'], - 'trending': ['trending_group', 'trending_mixed'], - }[qs.get('order', ['new'])[0]], - 'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES, - } - duration = qs.get('duration', [None])[0] - if duration: - params['duration'] = { - 'long': '>=1200', - 'short': '<=240', - }[duration] - language = qs.get('language', ['all'])[0] - if language != 'all': - languages = [language] - if language == 'en': - languages.append('none') - params['any_languages'] = languages - entries = OnDemandPagedList( - functools.partial(self._fetch_page, claim_id, url, params), - self._PAGE_SIZE) - result_value = result.get('value') or {} - return self.playlist_result( - entries, claim_id, result_value.get('title'), - result_value.get('description')) + + return self._playlist_entries(url, claim_id, {'channel_ids': [claim_id]}, result) + + +class LBRYPlaylistIE(LBRYBaseIE): + IE_NAME = 'lbry:playlist' + _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'\$/(?:play)?list/(?P[0-9a-f-]+)' + _TESTS = [{ + 'url': 'https://odysee.com/$/playlist/ffef782f27486f0ac138bde8777f72ebdd0548c2', + 'info_dict': { + 'id': 'ffef782f27486f0ac138bde8777f72ebdd0548c2', + 'title': 'Théâtre Classique', + 'description': 'Théâtre Classique', + }, + 'playlist_mincount': 4, + }, { + 'url': 'https://odysee.com/$/list/9c6658b3dd21e4f2a0602d523a13150e2b48b770', + 'info_dict': { + 'id': '9c6658b3dd21e4f2a0602d523a13150e2b48b770', + 'title': 'Social Media Exposed', + 'description': 'md5:98af97317aacd5b85d595775ea37d80e', + }, + 'playlist_mincount': 34, + }, { + 'url': 'https://odysee.com/$/playlist/938fb11d-215f-4d1c-ad64-723954df2184', + 'info_dict': { + 'id': '938fb11d-215f-4d1c-ad64-723954df2184', + }, + 'playlist_mincount': 1000, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + result = traverse_obj(self._call_api_proxy('claim_search', display_id, { + 'claim_ids': [display_id], + 'no_totals': True, + 'page': 1, + 'page_size': self._PAGE_SIZE, + }, 'playlist'), ('items', 0)) + claim_param = {'claim_ids': traverse_obj(result, ('value', 'claims', ..., {str}))} + + return self._playlist_entries(url, display_id, claim_param, result) From fbcc299bd8a19cf8b3c8805d6c268a9110230973 Mon Sep 17 00:00:00 2001 From: Umar Getagazov Date: Sat, 7 Oct 2023 01:45:46 +0300 Subject: [PATCH 35/41] [ie/substack] Fix embed extraction (#8218) Authored by: handlerug --- yt_dlp/extractor/substack.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/substack.py b/yt_dlp/extractor/substack.py index 3782ceed1..5835a5a8d 100644 --- a/yt_dlp/extractor/substack.py +++ b/yt_dlp/extractor/substack.py @@ -50,7 +50,7 @@ class SubstackIE(InfoExtractor): if not re.search(r']+src=["\']https://substackcdn.com/[^"\']+\.js', webpage): return - mobj = re.search(r'{[^}]*["\']subdomain["\']\s*:\s*["\'](?P[^"]+)', webpage) + mobj = re.search(r'{[^}]*\\?["\']subdomain\\?["\']\s*:\s*\\?["\'](?P[^\\"\']+)', webpage) if mobj: parsed = urllib.parse.urlparse(url) yield parsed._replace(netloc=f'{mobj.group("subdomain")}.substack.com').geturl() From 2f2dda3a7e85148773da3cdbc03ac9949ec1bc45 Mon Sep 17 00:00:00 2001 From: Umar Getagazov Date: Sat, 7 Oct 2023 01:48:54 +0300 Subject: [PATCH 36/41] [ie/substack] Fix download cookies bug (#8219) Authored by: handlerug --- yt_dlp/extractor/substack.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/substack.py b/yt_dlp/extractor/substack.py index 5835a5a8d..6ee3f75e1 100644 --- a/yt_dlp/extractor/substack.py +++ b/yt_dlp/extractor/substack.py @@ -56,10 +56,10 @@ class SubstackIE(InfoExtractor): yield parsed._replace(netloc=f'{mobj.group("subdomain")}.substack.com').geturl() raise cls.StopExtraction() - def _extract_video_formats(self, video_id, username): + def _extract_video_formats(self, video_id, url): formats, subtitles = [], {} for video_format in ('hls', 'mp4'): - video_url = f'https://{username}.substack.com/api/v1/video/upload/{video_id}/src?type={video_format}' + video_url = urllib.parse.urljoin(url, f'/api/v1/video/upload/{video_id}/src?type={video_format}') if video_format == 'hls': fmts, subs = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4', fatal=False) @@ -81,12 +81,17 @@ class SubstackIE(InfoExtractor): r'window\._preloads\s*=\s*JSON\.parse\(', webpage, 'json string', display_id, transform_source=js_to_json, contains_pattern=r'"{(?s:.+)}"'), display_id) + canonical_url = url + domain = traverse_obj(webpage_info, ('domainInfo', 'customDomain', {str})) + if domain: + canonical_url = urllib.parse.urlparse(url)._replace(netloc=domain).geturl() + post_type = webpage_info['post']['type'] formats, subtitles = [], {} if post_type == 'podcast': formats, subtitles = [{'url': webpage_info['post']['podcast_url']}], {} elif post_type == 'video': - formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], username) + formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], canonical_url) else: self.raise_no_formats(f'Page type "{post_type}" is not supported') @@ -99,4 +104,5 @@ class SubstackIE(InfoExtractor): 'thumbnail': traverse_obj(webpage_info, ('post', 'cover_image')), 'uploader': traverse_obj(webpage_info, ('pub', 'name')), 'uploader_id': str_or_none(traverse_obj(webpage_info, ('post', 'publication_id'))), + 'webpage_url': canonical_url, } From 2ad3873f0dfa9285c91d2160e36c039e69d597c7 Mon Sep 17 00:00:00 2001 From: garret Date: Fri, 6 Oct 2023 23:53:11 +0100 Subject: [PATCH 37/41] [ie/radiko] Improve extraction (#8221) Authored by: garret1317 --- yt_dlp/extractor/radiko.py | 67 ++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 28 deletions(-) diff --git a/yt_dlp/extractor/radiko.py b/yt_dlp/extractor/radiko.py index cef68eba0..8c8fb1a8f 100644 --- a/yt_dlp/extractor/radiko.py +++ b/yt_dlp/extractor/radiko.py @@ -1,4 +1,5 @@ import base64 +import random import urllib.parse from .common import InfoExtractor @@ -13,6 +14,7 @@ from ..utils import ( class RadikoBaseIE(InfoExtractor): + _GEO_BYPASS = False _FULL_KEY = None _HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED = ( 'https://c-rpaa.smartstream.ne.jp', @@ -32,7 +34,7 @@ class RadikoBaseIE(InfoExtractor): 'https://c-radiko.smartstream.ne.jp', ) - def _auth_client(self): + def _negotiate_token(self): _, auth1_handle = self._download_webpage_handle( 'https://radiko.jp/v2/api/auth1', None, 'Downloading authentication page', headers={ @@ -58,10 +60,23 @@ class RadikoBaseIE(InfoExtractor): 'x-radiko-partialkey': partial_key, }).split(',')[0] + if area_id == 'OUT': + self.raise_geo_restricted(countries=['JP']) + auth_data = (auth_token, area_id) self.cache.store('radiko', 'auth_data', auth_data) return auth_data + def _auth_client(self): + cachedata = self.cache.load('radiko', 'auth_data') + if cachedata is not None: + response = self._download_webpage( + 'https://radiko.jp/v2/api/auth_check', None, 'Checking cached token', expected_status=401, + headers={'X-Radiko-AuthToken': cachedata[0], 'X-Radiko-AreaId': cachedata[1]}) + if response == 'OK': + return cachedata + return self._negotiate_token() + def _extract_full_key(self): if self._FULL_KEY: return self._FULL_KEY @@ -75,7 +90,7 @@ class RadikoBaseIE(InfoExtractor): if full_key: full_key = full_key.encode() - else: # use full key ever known + else: # use only full key ever known full_key = b'bcd151073c03b352e1ef2fd66c32209da9ca0afa' self._FULL_KEY = full_key @@ -103,24 +118,24 @@ class RadikoBaseIE(InfoExtractor): m3u8_playlist_data = self._download_xml( f'https://radiko.jp/v3/station/stream/pc_html5/{station}.xml', video_id, note='Downloading stream information') - m3u8_urls = m3u8_playlist_data.findall('.//url') formats = [] found = set() - for url_tag in m3u8_urls: - pcu = url_tag.find('playlist_create_url').text - url_attrib = url_tag.attrib + + timefree_int = 0 if is_onair else 1 + + for element in m3u8_playlist_data.findall(f'.//url[@timefree="{timefree_int}"]/playlist_create_url'): + pcu = element.text + if pcu in found: + continue + found.add(pcu) playlist_url = update_url_query(pcu, { 'station_id': station, **query, 'l': '15', - 'lsid': '88ecea37e968c1f17d5413312d9f8003', + 'lsid': ''.join(random.choices('0123456789abcdef', k=32)), 'type': 'b', }) - if playlist_url in found: - continue - else: - found.add(playlist_url) time_to_skip = None if is_onair else cursor - ft @@ -138,7 +153,7 @@ class RadikoBaseIE(InfoExtractor): not is_onair and pcu.startswith(self._HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED)): sf['preference'] = -100 sf['format_note'] = 'not preferred' - if not is_onair and url_attrib['timefree'] == '1' and time_to_skip: + if not is_onair and timefree_int == 1 and time_to_skip: sf['downloader_options'] = {'ffmpeg_args': ['-ss', time_to_skip]} formats.extend(subformats) @@ -166,21 +181,7 @@ class RadikoIE(RadikoBaseIE): vid_int = unified_timestamp(video_id, False) prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int) - auth_cache = self.cache.load('radiko', 'auth_data') - for attempt in range(2): - auth_token, area_id = (not attempt and auth_cache) or self._auth_client() - formats = self._extract_formats( - video_id=video_id, station=station, is_onair=False, - ft=ft, cursor=vid_int, auth_token=auth_token, area_id=area_id, - query={ - 'start_at': radio_begin, - 'ft': radio_begin, - 'end_at': radio_end, - 'to': radio_end, - 'seek': video_id, - }) - if formats: - break + auth_token, area_id = self._auth_client() return { 'id': video_id, @@ -189,8 +190,18 @@ class RadikoIE(RadikoBaseIE): 'uploader': try_call(lambda: station_program.find('.//name').text), 'uploader_id': station, 'timestamp': vid_int, - 'formats': formats, 'is_live': True, + 'formats': self._extract_formats( + video_id=video_id, station=station, is_onair=False, + ft=ft, cursor=vid_int, auth_token=auth_token, area_id=area_id, + query={ + 'start_at': radio_begin, + 'ft': radio_begin, + 'end_at': radio_end, + 'to': radio_end, + 'seek': video_id + } + ), } From 35d9cbaf9638ccc9daf8a863063b2e7c135bc664 Mon Sep 17 00:00:00 2001 From: AS6939 <46506352+AS6939@users.noreply.github.com> Date: Sat, 7 Oct 2023 06:56:12 +0800 Subject: [PATCH 38/41] [ie/iq.com] Fix extraction and subtitles (#8260) Closes #7734, Closes #8123 Authored by: AS6939 --- yt_dlp/extractor/iqiyi.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index fa602ba88..3368ab1d9 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -499,9 +499,10 @@ class IqIE(InfoExtractor): 'tm': tm, 'qdy': 'a', 'qds': 0, - 'k_ft1': 141287244169348, - 'k_ft4': 34359746564, - 'k_ft5': 1, + 'k_ft1': '143486267424900', + 'k_ft4': '1572868', + 'k_ft7': '4', + 'k_ft5': '1', 'bop': JSON.stringify({ 'version': '10.0', 'dfp': dfp @@ -529,14 +530,22 @@ class IqIE(InfoExtractor): webpack_js_url = self._proto_relative_url(self._search_regex( r'') + + return { + 'id': video_id, + 'formats': self._extract_m3u8_formats( + data.get('file_url') or data['stream_url'], video_id, 'm4a', m3u8_id='hls'), + 'age_limit': 18, + **traverse_obj(data, { + 'title': ('title', {str}), + 'description': ('description', {str}), + 'release_timestamp': ('created_at', {parse_iso8601}), + 'modified_timestamp': ('updated_at', {parse_iso8601}), + 'uploader': ('user', 'name', {str}), + 'uploader_id': ('user', 'id', {str_or_none}), + 'uploader_url': ('user', 'permalink_url', {url_or_none}), + 'thumbnail': ('artwork_url', {url_or_none}), + 'duration': ('duration', {int_or_none}), + 'view_count': ('plays', {int_or_none}), + 'comment_count': ('comment_count', {int_or_none}), + 'webpage_url': ('permalink_url', {url_or_none}), + }), + } From 0e722f2f3ca42e634fd7b06ee70b16bf833ce132 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Droz?= Date: Fri, 6 Oct 2023 19:59:42 -0300 Subject: [PATCH 40/41] [ie/lbry] Extract `uploader_id` (#8244) Closes #123 Authored by: drzraf --- yt_dlp/extractor/lbry.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py index ccce300b5..cc37c41e8 100644 --- a/yt_dlp/extractor/lbry.py +++ b/yt_dlp/extractor/lbry.py @@ -70,11 +70,11 @@ class LBRYBaseIE(InfoExtractor): 'duration': ('value', stream_type, 'duration', {int_or_none}), 'channel': ('signing_channel', 'value', 'title', {str}), 'channel_id': ('signing_channel', 'claim_id', {str}), + 'uploader_id': ('signing_channel', 'name', {str}), }) - channel_name = traverse_obj(stream, ('signing_channel', 'name', {str})) - if channel_name and info.get('channel_id'): - info['channel_url'] = self._permanent_url(url, channel_name, info['channel_id']) + if info.get('uploader_id') and info.get('channel_id'): + info['channel_url'] = self._permanent_url(url, info['uploader_id'], info['channel_id']) return info @@ -159,6 +159,7 @@ class LBRYIE(LBRYBaseIE): 'height': 720, 'thumbnail': 'https://spee.ch/7/67f2d809c263288c.png', 'license': 'None', + 'uploader_id': '@Mantega', 'duration': 346, 'channel': 'LBRY/Odysee rats united!!!', 'channel_id': '1c8ad6a2ab4e889a71146ae4deeb23bb92dab627', @@ -192,6 +193,7 @@ class LBRYIE(LBRYBaseIE): 'vcodec': 'none', 'thumbnail': 'https://spee.ch/d/0bc63b0e6bf1492d.png', 'license': 'None', + 'uploader_id': '@LBRYFoundation', } }, { 'url': 'https://odysee.com/@gardeningincanada:b/plants-i-will-never-grow-again.-the:e', @@ -210,7 +212,8 @@ class LBRYIE(LBRYBaseIE): 'channel': 'Gardening In Canada', 'channel_id': 'b8be0e93b423dad221abe29545fbe8ec36e806bc', 'channel_url': 'https://odysee.com/@gardeningincanada:b8be0e93b423dad221abe29545fbe8ec36e806bc', - 'formats': 'mincount:3', # FIXME + 'uploader_id': '@gardeningincanada', + 'formats': 'mincount:3', 'thumbnail': 'https://thumbnails.lbry.com/AgHSc_HzrrE', 'license': 'Copyrighted (contact publisher)', } @@ -235,6 +238,7 @@ class LBRYIE(LBRYBaseIE): 'formats': 'mincount:1', 'thumbnail': 'startswith:https://thumb', 'license': 'None', + 'uploader_id': '@RT', }, 'params': {'skip_download': True} }, { @@ -249,6 +253,7 @@ class LBRYIE(LBRYBaseIE): 'channel': 'Wicked Truths', 'channel_id': '23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0', 'channel_url': 'https://odysee.com/@wickedtruths:23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0', + 'uploader_id': '@wickedtruths', 'timestamp': 1695114347, 'upload_date': '20230919', 'release_timestamp': 1685617473, From e831c80e8b2fc025b3b67d82974cc59e3526fdc8 Mon Sep 17 00:00:00 2001 From: garret Date: Sat, 7 Oct 2023 00:05:48 +0100 Subject: [PATCH 41/41] [ie/nhk] Fix VOD extraction (#8249) Closes #8242 Authored by: garret1317 --- yt_dlp/extractor/nhk.py | 46 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py index fbd6a18f6..bcbc2279f 100644 --- a/yt_dlp/extractor/nhk.py +++ b/yt_dlp/extractor/nhk.py @@ -28,6 +28,44 @@ class NhkBaseIE(InfoExtractor): m_id, lang, '/all' if is_video else ''), m_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'] or [] + def _get_api_info(self, refresh=True): + if not refresh: + return self.cache.load('nhk', 'api_info') + + self.cache.store('nhk', 'api_info', {}) + movie_player_js = self._download_webpage( + 'https://movie-a.nhk.or.jp/world/player/js/movie-player.js', None, + note='Downloading stream API information') + api_info = { + 'url': self._search_regex( + r'prod:[^;]+\bapiUrl:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API url'), + 'token': self._search_regex( + r'prod:[^;]+\btoken:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API token'), + } + self.cache.store('nhk', 'api_info', api_info) + return api_info + + def _extract_formats_and_subtitles(self, vod_id): + for refresh in (False, True): + api_info = self._get_api_info(refresh) + if not api_info: + continue + + api_url = api_info.pop('url') + stream_url = traverse_obj( + self._download_json( + api_url, vod_id, 'Downloading stream url info', fatal=False, query={ + **api_info, + 'type': 'json', + 'optional_id': vod_id, + 'active_flg': 1, + }), + ('meta', 0, 'movie_url', ('mb_auto', 'auto_sp', 'auto_pc'), {url_or_none}), get_all=False) + if stream_url: + return self._extract_m3u8_formats_and_subtitles(stream_url, vod_id) + + raise ExtractorError('Unable to extract stream url') + def _extract_episode_info(self, url, episode=None): fetch_episode = episode is None lang, m_type, episode_id = NhkVodIE._match_valid_url(url).groups() @@ -67,12 +105,14 @@ class NhkBaseIE(InfoExtractor): } if is_video: vod_id = episode['vod_id'] + formats, subs = self._extract_formats_and_subtitles(vod_id) + info.update({ - '_type': 'url_transparent', - 'ie_key': 'Piksel', - 'url': 'https://movie-s.nhk.or.jp/v/refid/nhkworld/prefid/' + vod_id, 'id': vod_id, + 'formats': formats, + 'subtitles': subs, }) + else: if fetch_episode: audio_path = episode['audio']['audio']