From 5310fa87f6cb7f66bf42e2520878952fbf6b1652 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 10 Oct 2024 16:29:22 -0500 Subject: [PATCH 01/44] [ie/weverse] Fix extractor (#11215) Closes #11213 Authored by: bashonly --- yt_dlp/extractor/weverse.py | 60 +++++++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 15 deletions(-) diff --git a/yt_dlp/extractor/weverse.py b/yt_dlp/extractor/weverse.py index c94ca9db97..6f1a8b95d8 100644 --- a/yt_dlp/extractor/weverse.py +++ b/yt_dlp/extractor/weverse.py @@ -27,8 +27,9 @@ class WeverseBaseIE(InfoExtractor): _NETRC_MACHINE = 'weverse' - _ACCOUNT_API_BASE = 'https://accountapi.weverse.io/web/api/v2' + _ACCOUNT_API_BASE = 'https://accountapi.weverse.io/web/api' _API_HEADERS = { + 'Accept': 'application/json', 'Referer': 'https://weverse.io/', 'WEV-device-Id': str(uuid.uuid4()), } @@ -39,14 +40,14 @@ def _perform_login(self, username, password): headers = { 'x-acc-app-secret': '5419526f1c624b38b10787e5c10b2a7a', - 'x-acc-app-version': '2.2.6', + 'x-acc-app-version': '3.3.6', 'x-acc-language': 'en', 'x-acc-service-id': 'weverse', 'x-acc-trace-id': str(uuid.uuid4()), 'x-clog-user-device-id': str(uuid.uuid4()), } valid_username = traverse_obj(self._download_json( - f'{self._ACCOUNT_API_BASE}/signup/email/status', None, note='Checking username', + f'{self._ACCOUNT_API_BASE}/v2/signup/email/status', None, note='Checking username', query={'email': username}, headers=headers, expected_status=(400, 404)), 'hasPassword') if not valid_username: raise ExtractorError('Invalid username provided', expected=True) @@ -54,8 +55,9 @@ def _perform_login(self, username, password): headers['content-type'] = 'application/json' try: auth = self._download_json( - f'{self._ACCOUNT_API_BASE}/auth/token/by-credentials', None, data=json.dumps({ + f'{self._ACCOUNT_API_BASE}/v3/auth/token/by-credentials', None, data=json.dumps({ 'email': username, + 'otpSessionId': 'BY_PASS', 'password': password, }, separators=(',', ':')).encode(), headers=headers, note='Logging in') except ExtractorError as e: @@ -78,8 +80,10 @@ def _call_api(self, ep, video_id, data=None, note='Downloading API JSON'): # From https://ssl.pstatic.net/static/wevweb/2_3_2_11101725/public/static/js/main.e206f7c1.js: key = b'1b9cb6378d959b45714bec49971ade22e6e24e42' api_path = update_url_query(ep, { + # 'gcc': 'US', 'appId': 'be4d79eb8fc7bd008ee82c8ec4ff6fd4', 'language': 'en', + 'os': 'WEB', 'platform': 'WEB', 'wpf': 'pc', }) @@ -152,7 +156,7 @@ def _parse_post_meta(self, metadata): 'description': ((('extension', 'mediaInfo', 'body'), 'body'), {str}), 'uploader': ('author', 'profileName', {str}), 'uploader_id': ('author', 'memberId', {str}), - 'creator': ('community', 'communityName', {str}), + 'creators': ('community', 'communityName', {str}, all), 'channel_id': (('community', 'author'), 'communityId', {str_or_none}), 'duration': ('extension', 'video', 'playTime', {float_or_none}), 'timestamp': ('publishedAt', {lambda x: int_or_none(x, 1000)}), @@ -196,7 +200,7 @@ class WeverseIE(WeverseBaseIE): 'channel': 'billlie', 'channel_id': '72', 'channel_url': 'https://weverse.io/billlie', - 'creator': 'Billlie', + 'creators': ['Billlie'], 'timestamp': 1666262062, 'upload_date': '20221020', 'release_timestamp': 1666262058, @@ -222,7 +226,7 @@ class WeverseIE(WeverseBaseIE): 'channel': 'lesserafim', 'channel_id': '47', 'channel_url': 'https://weverse.io/lesserafim', - 'creator': 'LE SSERAFIM', + 'creators': ['LE SSERAFIM'], 'timestamp': 1659353400, 'upload_date': '20220801', 'release_timestamp': 1659353400, @@ -286,7 +290,7 @@ def _real_extract(self, url): elif live_status == 'is_live': video_info = self._call_api( - f'/video/v1.0/lives/{api_video_id}/playInfo?preview.format=json&preview.version=v2', + f'/video/v1.2/lives/{api_video_id}/playInfo?preview.format=json&preview.version=v2', video_id, note='Downloading live JSON') playback = self._parse_json(video_info['lipPlayback'], video_id) m3u8_url = traverse_obj(playback, ( @@ -302,7 +306,7 @@ def _real_extract(self, url): else: infra_video_id = post['extension']['video']['infraVideoId'] in_key = self._call_api( - f'/video/v1.0/vod/{api_video_id}/inKey?preview=false', video_id, + f'/video/v1.1/vod/{api_video_id}/inKey?preview=false', video_id, data=b'{}', note='Downloading VOD API key')['inKey'] video_info = self._download_json( @@ -347,7 +351,6 @@ class WeverseMediaIE(WeverseBaseIE): _VALID_URL = r'https?://(?:www\.|m\.)?weverse\.io/(?P[^/?#]+)/media/(?P[\d-]+)' _TESTS = [{ 'url': 'https://weverse.io/billlie/media/4-116372884', - 'md5': '8efc9cfd61b2f25209eb1a5326314d28', 'info_dict': { 'id': 'e-C9wLSQs6o', 'ext': 'mp4', @@ -358,8 +361,9 @@ class WeverseMediaIE(WeverseBaseIE): 'channel_url': 'https://www.youtube.com/channel/UCyc9sUCxELTDK9vELO5Fzeg', 'uploader': 'Billlie', 'uploader_id': '@Billlie', - 'uploader_url': 'http://www.youtube.com/@Billlie', + 'uploader_url': 'https://www.youtube.com/@Billlie', 'upload_date': '20230403', + 'timestamp': 1680533992, 'duration': 211, 'age_limit': 0, 'playable_in_embed': True, @@ -372,6 +376,8 @@ class WeverseMediaIE(WeverseBaseIE): 'thumbnail': 'https://i.ytimg.com/vi/e-C9wLSQs6o/maxresdefault.jpg', 'categories': ['Entertainment'], 'tags': 'count:7', + 'channel_is_verified': True, + 'heatmap': 'count:100', }, }, { 'url': 'https://weverse.io/billlie/media/3-102914520', @@ -386,7 +392,7 @@ class WeverseMediaIE(WeverseBaseIE): 'channel': 'billlie', 'channel_id': '72', 'channel_url': 'https://weverse.io/billlie', - 'creator': 'Billlie', + 'creators': ['Billlie'], 'timestamp': 1662174000, 'upload_date': '20220903', 'release_timestamp': 1662174000, @@ -432,7 +438,7 @@ class WeverseMomentIE(WeverseBaseIE): 'uploader_id': '66a07e164b56a696ee71c99315ffe27b', 'channel': 'secretnumber', 'channel_id': '56', - 'creator': 'SECRET NUMBER', + 'creators': ['SECRET NUMBER'], 'duration': 10, 'upload_date': '20230405', 'timestamp': 1680653968, @@ -441,7 +447,6 @@ class WeverseMomentIE(WeverseBaseIE): 'comment_count': int, 'availability': 'needs_auth', }, - 'skip': 'Moment has expired', }] def _real_extract(self, url): @@ -571,7 +576,7 @@ class WeverseLiveIE(WeverseBaseIE): 'channel': 'purplekiss', 'channel_id': '35', 'channel_url': 'https://weverse.io/purplekiss', - 'creator': 'PURPLE KISS', + 'creators': ['PURPLE KISS'], 'timestamp': 1680780892, 'upload_date': '20230406', 'release_timestamp': 1680780883, @@ -584,6 +589,31 @@ class WeverseLiveIE(WeverseBaseIE): 'live_status': 'is_live', }, 'skip': 'Livestream has ended', + }, { + 'url': 'https://weverse.io/lesserafim', + 'info_dict': { + 'id': '4-181521628', + 'ext': 'mp4', + 'title': r're:심심해서요', + 'description': '', + 'uploader': '채채🤎', + 'uploader_id': 'd49b8b06f3cc1d92d655b25ab27ac2e7', + 'channel': 'lesserafim', + 'channel_id': '47', + 'creators': ['LE SSERAFIM'], + 'channel_url': 'https://weverse.io/lesserafim', + 'timestamp': 1728570273, + 'upload_date': '20241010', + 'release_timestamp': 1728570264, + 'release_date': '20241010', + 'thumbnail': r're:https://phinf\.wevpstatic\.net/.+\.png', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'availability': 'needs_auth', + 'live_status': 'is_live', + }, + 'skip': 'Livestream has ended', }, { 'url': 'https://weverse.io/billlie/', 'only_matching': True, From ceaea731b6e314dbbdfb2e358d7677785ed0b4fc Mon Sep 17 00:00:00 2001 From: vvto33 <54504675+vvto33@users.noreply.github.com> Date: Fri, 11 Oct 2024 06:42:34 +0900 Subject: [PATCH 02/44] [ie/TVer] Support series URLs (#9507) Also improve thumbnails extraction Authored by: vvto33, pzhlkj6612 Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com> --- yt_dlp/extractor/tver.py | 148 ++++++++++++++++++++++++++------------- 1 file changed, 101 insertions(+), 47 deletions(-) diff --git a/yt_dlp/extractor/tver.py b/yt_dlp/extractor/tver.py index c13832c6f5..a8865fe649 100644 --- a/yt_dlp/extractor/tver.py +++ b/yt_dlp/extractor/tver.py @@ -6,11 +6,12 @@ str_or_none, strip_or_none, traverse_obj, + update_url_query, ) class TVerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?Plp|corner|series|episodes?|feature|tokyo2020/video|olympic/paris2024/video)/)+(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?Plp|corner|series|episodes?|feature)/)+(?P[a-zA-Z0-9]+)' _TESTS = [{ 'skip': 'videos are only available for 7 days', 'url': 'https://tver.jp/episodes/ep83nf3w4p', @@ -21,80 +22,115 @@ class TVerIE(InfoExtractor): 'episode': '売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!', 'alt_title': '売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!', 'channel': 'テレビ朝日', + 'id': 'ep83nf3w4p', + 'ext': 'mp4', + 'onair_label': '5月3日(火)放送分', + 'ext_title': '家事ヤロウ!!! 売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着! テレビ朝日 5月3日(火)放送分', }, 'add_ie': ['BrightcoveNew'], - }, { - 'url': 'https://tver.jp/olympic/paris2024/video/6359578055112/', - 'info_dict': { - 'id': '6359578055112', - 'ext': 'mp4', - 'title': '堀米雄斗 金メダルで五輪連覇!「みんなの応援が最後に乗れたカギ」', - 'timestamp': 1722279928, - 'upload_date': '20240729', - 'tags': ['20240729', 'japanese', 'japanmedal', 'paris'], - 'uploader_id': '4774017240001', - 'thumbnail': r're:https?://[^/?#]+boltdns\.net/[^?#]+/1920x1080/match/image\.jpg', - 'duration': 670.571, - }, - 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://tver.jp/corner/f0103888', 'only_matching': True, }, { 'url': 'https://tver.jp/lp/f0033031', 'only_matching': True, + }, { + 'url': 'https://tver.jp/series/srtxft431v', + 'info_dict': { + 'id': 'srtxft431v', + 'title': '名探偵コナン', + }, + 'playlist': [ + { + 'md5': '779ffd97493ed59b0a6277ea726b389e', + 'info_dict': { + 'id': 'ref:conan-1137-241005', + 'ext': 'mp4', + 'title': '名探偵コナン #1137「行列店、味変の秘密」', + 'uploader_id': '5330942432001', + 'tags': [], + 'channel': '読売テレビ', + 'series': '名探偵コナン', + 'description': 'md5:601fccc1d2430d942a2c8068c4b33eb5', + 'episode': '#1137「行列店、味変の秘密」', + 'duration': 1469.077, + 'timestamp': 1728030405, + 'upload_date': '20241004', + 'alt_title': '名探偵コナン #1137「行列店、味変の秘密」 読売テレビ 10月5日(土)放送分', + 'thumbnail': r're:https://.+\.jpg', + }, + }], + }, { + 'url': 'https://tver.jp/series/sru35hwdd2', + 'info_dict': { + 'id': 'sru35hwdd2', + 'title': '神回だけ見せます!', + }, + 'playlist_count': 11, + }, { + 'url': 'https://tver.jp/series/srkq2shp9d', + 'only_matching': True, }] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' - _PLATFORM_UID = None - _PLATFORM_TOKEN = None + _HEADERS = {'x-tver-platform-type': 'web'} + _PLATFORM_QUERY = {} def _real_initialize(self): - create_response = self._download_json( - 'https://platform-api.tver.jp/v2/api/platform_users/browser/create', None, - note='Creating session', data=b'device_type=pc', headers={ - 'Origin': 'https://s.tver.jp', - 'Referer': 'https://s.tver.jp/', - 'Content-Type': 'application/x-www-form-urlencoded', + session_info = self._download_json( + 'https://platform-api.tver.jp/v2/api/platform_users/browser/create', + None, 'Creating session', data=b'device_type=pc') + self._PLATFORM_QUERY = traverse_obj(session_info, ('result', { + 'platform_uid': 'platform_uid', + 'platform_token': 'platform_token', + })) + + def _call_platform_api(self, path, video_id, note=None, fatal=True, query=None): + return self._download_json( + f'https://platform-api.tver.jp/service/api/{path}', video_id, note, + fatal=fatal, headers=self._HEADERS, query={ + **self._PLATFORM_QUERY, + **(query or {}), }) - self._PLATFORM_UID = traverse_obj(create_response, ('result', 'platform_uid')) - self._PLATFORM_TOKEN = traverse_obj(create_response, ('result', 'platform_token')) + + def _yield_episode_ids_for_series(self, series_id): + seasons_info = self._download_json( + f'https://service-api.tver.jp/api/v1/callSeriesSeasons/{series_id}', + series_id, 'Downloading seasons info', headers=self._HEADERS) + for season_id in traverse_obj( + seasons_info, ('result', 'contents', lambda _, v: v['type'] == 'season', 'content', 'id', {str})): + episodes_info = self._call_platform_api( + f'v1/callSeasonEpisodes/{season_id}', series_id, f'Downloading season {season_id} episodes info') + yield from traverse_obj(episodes_info, ( + 'result', 'contents', lambda _, v: v['type'] == 'episode', 'content', 'id', {str})) def _real_extract(self, url): video_id, video_type = self._match_valid_url(url).group('id', 'type') - if video_type == 'olympic/paris2024/video': - # Player ID is taken from .content.brightcove.E200.pro.pc.account_id: - # https://tver.jp/olympic/paris2024/req/api/hook?q=https%3A%2F%2Folympic-assets.tver.jp%2Fweb-static%2Fjson%2Fconfig.json&d= - return self.url_result(smuggle_url( - self.BRIGHTCOVE_URL_TEMPLATE % ('4774017240001', video_id), - {'geo_countries': ['JP']}), 'BrightcoveNew') + if video_type == 'series': + series_info = self._call_platform_api( + f'v2/callSeries/{video_id}', video_id, 'Downloading series info') + return self.playlist_from_matches( + self._yield_episode_ids_for_series(video_id), video_id, + traverse_obj(series_info, ('result', 'content', 'content', 'title', {str})), + ie=TVerIE, getter=lambda x: f'https://tver.jp/episodes/{x}') - elif video_type not in {'series', 'episodes'}: + if video_type != 'episodes': webpage = self._download_webpage(url, video_id, note='Resolving to new URL') video_id = self._match_id(self._search_regex( (r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'), webpage, 'url regex')) - episode_info = self._download_json( - f'https://platform-api.tver.jp/service/api/v1/callEpisode/{video_id}?require_data=mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]', - video_id, fatal=False, - query={ - 'platform_uid': self._PLATFORM_UID, - 'platform_token': self._PLATFORM_TOKEN, - }, headers={ - 'x-tver-platform-type': 'web', + episode_info = self._call_platform_api( + f'v1/callEpisode/{video_id}', video_id, 'Downloading episode info', fatal=False, query={ + 'require_data': 'mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]', }) episode_content = traverse_obj( episode_info, ('result', 'episode', 'content')) or {} + version = traverse_obj(episode_content, ('version', {str_or_none}), default='5') video_info = self._download_json( - f'https://statics.tver.jp/content/episode/{video_id}.json', video_id, - query={ - 'v': str_or_none(episode_content.get('version')) or '5', - }, headers={ - 'Origin': 'https://tver.jp', - 'Referer': 'https://tver.jp/', - }) + f'https://statics.tver.jp/content/episode/{video_id}.json', video_id, 'Downloading video info', + query={'v': version}, headers={'Referer': 'https://tver.jp/'}) p_id = video_info['video']['accountID'] r_id = traverse_obj(video_info, ('video', ('videoRefID', 'videoID')), get_all=False) if not r_id: @@ -110,6 +146,23 @@ def _real_extract(self, url): provider = str_or_none(episode_content.get('productionProviderName')) onair_label = str_or_none(episode_content.get('broadcastDateLabel')) + thumbnails = [ + { + 'id': quality, + 'url': update_url_query( + f'https://statics.tver.jp/images/content/thumbnail/episode/{quality}/{video_id}.jpg', + {'v': version}), + 'width': width, + 'height': height, + } + for quality, width, height in [ + ('small', 480, 270), + ('medium', 640, 360), + ('large', 960, 540), + ('xlarge', 1280, 720), + ] + ] + return { '_type': 'url_transparent', 'title': title, @@ -119,6 +172,7 @@ def _real_extract(self, url): 'alt_title': join_nonempty(title, provider, onair_label, delim=' '), 'channel': provider, 'description': str_or_none(video_info.get('description')), + 'thumbnails': thumbnails, 'url': smuggle_url( self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), {'geo_countries': ['JP']}), 'ie_key': 'BrightcoveNew', From f4338714241b11d9d43768ae71a25f5e952f677d Mon Sep 17 00:00:00 2001 From: 444995 <113297002+444995@users.noreply.github.com> Date: Sat, 12 Oct 2024 00:39:00 +0200 Subject: [PATCH 03/44] [ie/drtv] Fix extractor (#11141) Closes #11137 Authored by: 444995 --- yt_dlp/extractor/drtv.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/drtv.py b/yt_dlp/extractor/drtv.py index 32b6845528..ba86eb2b49 100644 --- a/yt_dlp/extractor/drtv.py +++ b/yt_dlp/extractor/drtv.py @@ -139,12 +139,11 @@ def _real_initialize(self): return token_response = self._download_json( - 'https://production.dr-massive.com/api/authorization/anonymous-sso', None, + 'https://isl.dr-massive.com/api/authorization/anonymous-sso', None, note='Downloading anonymous token', headers={ 'content-type': 'application/json', }, query={ - 'device': 'web_browser', - 'ff': 'idp,ldp,rpt', + 'device': 'phone_android', 'lang': 'da', 'supportFallbackToken': 'true', }, data=json.dumps({ From 9d43dcb2c5c38f443f84dfc126cd32720e1a1ad6 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 12 Oct 2024 17:47:37 +0000 Subject: [PATCH 04/44] [ie/cwtv] Fix extraction (#11230) Closes #9935 Authored by: bashonly --- yt_dlp/extractor/cwtv.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/cwtv.py b/yt_dlp/extractor/cwtv.py index 4559d3cd68..cb432e6160 100644 --- a/yt_dlp/extractor/cwtv.py +++ b/yt_dlp/extractor/cwtv.py @@ -6,6 +6,7 @@ parse_iso8601, smuggle_url, str_or_none, + update_url_query, ) @@ -98,7 +99,9 @@ def _real_extract(self, url): raise ExtractorError(data['msg'], expected=True) video_data = data['video'] title = video_data['title'] - mpx_url = video_data.get('mpx_url') or f'https://link.theplatform.com/s/cwtv/media/guid/2703454149/{video_id}?formats=M3U' + mpx_url = update_url_query( + video_data.get('mpx_url') or f'https://link.theplatform.com/s/cwtv/media/guid/2703454149/{video_id}', + {'formats': 'M3U+none'}) season = str_or_none(video_data.get('season')) episode = str_or_none(video_data.get('episode')) From babb70960595e2146f06f81affc29c7e713e34e2 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 12 Oct 2024 23:23:03 +0000 Subject: [PATCH 05/44] [ie/patreon:campaign] Stricter URL matching (#11235) Redefinition of suitable() is no longer necessary Closes #11233 Authored by: bashonly --- yt_dlp/extractor/patreon.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index f5cb2a5d65..4d668cd37d 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -55,6 +55,7 @@ def _call_api(self, ep, item_id, query=None, headers=None, fatal=True, note=None class PatreonIE(PatreonBaseIE): + IE_NAME = 'patreon' _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P\d+)' _TESTS = [{ 'url': 'http://www.patreon.com/creation?hid=743933', @@ -433,8 +434,12 @@ def _get_comments(self, post_id): class PatreonCampaignIE(PatreonBaseIE): - - _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?!rss)(?:(?:m|api/campaigns)/(?P\d+)|(?P[-\w]+))' + IE_NAME = 'patreon:campaign' + _VALID_URL = r'''(?x) + https?://(?:www\.)?patreon\.com/(?: + (?:m|api/campaigns)/(?P\d+)| + (?P(?!creation[?/]|posts/|rss[?/])[\w-]+) + )(?:/posts)?/?(?:$|[?#])''' _TESTS = [{ 'url': 'https://www.patreon.com/dissonancepod/', 'info_dict': { @@ -496,10 +501,6 @@ class PatreonCampaignIE(PatreonBaseIE): 'only_matching': True, }] - @classmethod - def suitable(cls, url): - return False if PatreonIE.suitable(url) else super().suitable(url) - def _entries(self, campaign_id): cursor = None params = { From c5f0f58efd8c3930de8202c15a5c53b1b635bd51 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Sun, 13 Oct 2024 03:38:09 +0200 Subject: [PATCH 06/44] [cookies] Fix compatibility for Python <=3.9 in traceback Authored by: Grub4K --- yt_dlp/YoutubeDL.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 4f45d7faf6..9ac6ca0d0c 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -4120,7 +4120,8 @@ def cookiejar(self): self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self) except CookieLoadError as error: cause = error.__context__ - self.report_error(str(cause), tb=''.join(traceback.format_exception(cause))) + # compat: <=py3.9: `traceback.format_exception` has a different signature + self.report_error(str(cause), tb=''.join(traceback.format_exception(None, cause, cause.__traceback__))) raise @property From edfd095b1917701c5046bd51f9542897c17d41a7 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Sun, 13 Oct 2024 03:42:43 +0200 Subject: [PATCH 07/44] [ie/generic] Impersonate browser by default (#11206) Also adds `impersonate` extractor arg Authored by: Grub4K --- README.md | 1 + yt_dlp/extractor/generic.py | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index fbf50072db..4b1ada82ed 100644 --- a/README.md +++ b/README.md @@ -1795,6 +1795,7 @@ #### generic * `key_query`: Passthrough the master m3u8 URL query to its HLS AES-128 decryption key URI if no value is provided, or else apply the query string given as `key_query=VALUE`. Note that this will have no effect if the key URI is provided via the `hls_key` extractor-arg. Does not apply to ffmpeg * `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist * `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live` +* `impersonate`: Target(s) to try and impersonate with the initial webpage request; e.g. `safari,chrome-110`. By default any available target will be used. Use `false` to disable impersonation #### funimation * `language`: Audio languages to extract, e.g. `funimation:language=english,japanese` diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 592800287a..9b5421e41d 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -8,6 +8,7 @@ from .commonprotocols import RtmpIE from .youtube import YoutubeIE from ..compat import compat_etree_fromstring +from ..networking.impersonate import ImpersonateTarget from ..utils import ( KNOWN_EXTENSIONS, MEDIA_EXTENSIONS, @@ -2373,6 +2374,12 @@ def _real_extract(self, url): else: video_id = self._generic_id(url) + # Try to impersonate a web-browser by default if possible + # Skip impersonation if not available to omit the warning + impersonate = self._configuration_arg('impersonate', ['']) + if 'false' in impersonate or not self._downloader._impersonate_target_available(ImpersonateTarget()): + impersonate = None + # Some webservers may serve compressed content of rather big size (e.g. gzipped flac) # making it impossible to download only chunk of the file (yet we need only 512kB to # test whether it's HTML or not). According to yt-dlp default Accept-Encoding @@ -2384,7 +2391,7 @@ def _real_extract(self, url): full_response = self._request_webpage(url, video_id, headers=filter_dict({ 'Accept-Encoding': 'identity', 'Referer': smuggled_data.get('referer'), - })) + }), impersonate=impersonate) new_url = full_response.url if new_url != extract_basic_auth(url)[0]: self.report_following_redirect(new_url) From 1a830394a21a81a3e9918f9e175abc9fbb21f089 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Sun, 13 Oct 2024 03:50:31 +0200 Subject: [PATCH 08/44] [build] `make_lazy_extractors`: Force running without plugins (#11205) Authored by: Grub4K --- README.md | 3 ++- devscripts/make_lazy_extractors.py | 27 ++++----------------------- yt_dlp/YoutubeDL.py | 4 ++++ yt_dlp/plugins.py | 3 +++ 4 files changed, 13 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 4b1ada82ed..1cafe51d51 100644 --- a/README.md +++ b/README.md @@ -278,7 +278,7 @@ ### Related scripts * **`devscripts/update-version.py`** - Update the version number based on the current date. * **`devscripts/set-variant.py`** - Set the build variant of the executable. * **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file. -* **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS=1` if you wish to forcefully disable lazy extractor loading. +* **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS` to something nonempty to forcefully disable lazy extractor loading. Note: See their `--help` for more info. @@ -1898,6 +1898,7 @@ # PLUGINS myplugin.py yt-dlp looks for these `yt_dlp_plugins` namespace folders in many locations (see below) and loads in plugins from **all** of them. +Set the environment variable `YTDLP_NO_PLUGINS` to something nonempty to disable loading plugins entirely. See the [wiki for some known plugins](https://github.com/yt-dlp/yt-dlp/wiki/Plugins) diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index d74ea202f0..d288d84296 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -2,7 +2,6 @@ # Allow direct execution import os -import shutil import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -34,18 +33,14 @@ class {name}({bases}): def main(): + os.environ['YTDLP_NO_PLUGINS'] = 'true' + os.environ['YTDLP_NO_LAZY_EXTRACTORS'] = 'true' + lazy_extractors_filename = get_filename_args(default_outfile='yt_dlp/extractor/lazy_extractors.py') - if os.path.exists(lazy_extractors_filename): - os.remove(lazy_extractors_filename) - _ALL_CLASSES = get_all_ies() # Must be before import - - import yt_dlp.plugins + from yt_dlp.extractor.extractors import _ALL_CLASSES from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor - # Filter out plugins - _ALL_CLASSES = [cls for cls in _ALL_CLASSES if not cls.__module__.startswith(f'{yt_dlp.plugins.PACKAGE_NAME}.')] - DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR}) module_src = '\n'.join(( MODULE_TEMPLATE, @@ -58,20 +53,6 @@ def main(): write_file(lazy_extractors_filename, f'{module_src}\n') -def get_all_ies(): - PLUGINS_DIRNAME = 'ytdlp_plugins' - BLOCKED_DIRNAME = f'{PLUGINS_DIRNAME}_blocked' - if os.path.exists(PLUGINS_DIRNAME): - # os.rename cannot be used, e.g. in Docker. See https://github.com/yt-dlp/yt-dlp/pull/4958 - shutil.move(PLUGINS_DIRNAME, BLOCKED_DIRNAME) - try: - from yt_dlp.extractor.extractors import _ALL_CLASSES - finally: - if os.path.exists(BLOCKED_DIRNAME): - shutil.move(BLOCKED_DIRNAME, PLUGINS_DIRNAME) - return _ALL_CLASSES - - def extra_ie_code(ie, base=None): for var in STATIC_CLASS_PROPERTIES: val = getattr(ie, var) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 9ac6ca0d0c..eea1065036 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -4070,6 +4070,10 @@ def get_encoding(stream): write_debug(f'Proxy map: {self.proxies}') write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}') + if os.environ.get('YTDLP_NO_PLUGINS'): + write_debug('Plugins are forcibly disabled') + return + for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items(): display_list = ['{}{}'.format( klass.__name__, '' if klass.__name__ == name else f' as {name}') diff --git a/yt_dlp/plugins.py b/yt_dlp/plugins.py index 3cc879fd7e..d777d14e71 100644 --- a/yt_dlp/plugins.py +++ b/yt_dlp/plugins.py @@ -5,6 +5,7 @@ import importlib.util import inspect import itertools +import os import pkgutil import sys import traceback @@ -137,6 +138,8 @@ def load_module(module, module_name, suffix): def load_plugins(name, suffix): classes = {} + if os.environ.get('YTDLP_NO_PLUGINS'): + return classes for finder, module_name, _ in iter_modules(name): if any(x.startswith('_') for x in module_name.split('.')): From 16eb28026a2ddf5608d0a628ef15949b8d3805a9 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Sun, 13 Oct 2024 04:01:26 +0200 Subject: [PATCH 09/44] [test] Allow running tests explicitly (#11203) Authored by: Grub4K --- devscripts/run_tests.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/devscripts/run_tests.py b/devscripts/run_tests.py index c605aa62cf..eb614fe591 100755 --- a/devscripts/run_tests.py +++ b/devscripts/run_tests.py @@ -16,7 +16,7 @@ def parse_args(): parser = argparse.ArgumentParser(description='Run selected yt-dlp tests') parser.add_argument( - 'test', help='a extractor tests, or one of "core" or "download"', nargs='*') + 'test', help='an extractor test, test path, or one of "core" or "download"', nargs='*') parser.add_argument( '-k', help='run a test matching EXPRESSION. Same as "pytest -k"', metavar='EXPRESSION') parser.add_argument( @@ -27,7 +27,6 @@ def parse_args(): def run_tests(*tests, pattern=None, ci=False): run_core = 'core' in tests or (not pattern and not tests) run_download = 'download' in tests - tests = list(map(fix_test_name, tests)) pytest_args = args.pytest_args or os.getenv('HATCH_TEST_ARGS', '') arguments = ['pytest', '-Werror', '--tb=short', *shlex.split(pytest_args)] @@ -41,7 +40,9 @@ def run_tests(*tests, pattern=None, ci=False): arguments.extend(['-m', 'download']) else: arguments.extend( - f'test/test_download.py::TestDownload::test_{test}' for test in tests) + test if '/' in test + else f'test/test_download.py::TestDownload::test_{fix_test_name(test)}' + for test in tests) print(f'Running {arguments}', flush=True) try: From 85b87c991af25dcb35630fa94580fd418e78ee33 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Sun, 13 Oct 2024 04:10:12 +0200 Subject: [PATCH 10/44] [utils] `sanitize_path`: Reimplement function (#11198) Authored by: Grub4K --- test/test_utils.py | 10 +++++-- yt_dlp/utils/_utils.py | 64 +++++++++++++++++++++++++++--------------- 2 files changed, 50 insertions(+), 24 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 4f5fa1e100..d4b846f56f 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -221,9 +221,10 @@ def test_sanitize_ids(self): self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI') def test_sanitize_path(self): - if sys.platform != 'win32': - return + with unittest.mock.patch('sys.platform', 'win32'): + self._test_sanitize_path() + def _test_sanitize_path(self): self.assertEqual(sanitize_path('abc'), 'abc') self.assertEqual(sanitize_path('abc/def'), 'abc\\def') self.assertEqual(sanitize_path('abc\\def'), 'abc\\def') @@ -256,6 +257,11 @@ def test_sanitize_path(self): self.assertEqual(sanitize_path('./abc'), 'abc') self.assertEqual(sanitize_path('./../abc'), '..\\abc') + self.assertEqual(sanitize_path('\\abc'), '\\abc') + self.assertEqual(sanitize_path('C:abc'), 'C:abc') + self.assertEqual(sanitize_path('C:abc\\..\\'), 'C:..') + self.assertEqual(sanitize_path('C:\\abc:%(title)s.%(ext)s'), 'C:\\abc#%(title)s.%(ext)s') + def test_sanitize_url(self): self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar') self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar') diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index e1b3c48d63..967f01fdf9 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -664,31 +664,51 @@ def replace_insane(char): return result +def _sanitize_path_parts(parts): + sanitized_parts = [] + for part in parts: + if not part or part == '.': + continue + elif part == '..': + if sanitized_parts and sanitized_parts[-1] != '..': + sanitized_parts.pop() + sanitized_parts.append('..') + continue + # Replace invalid segments with `#` + # - trailing dots and spaces (`asdf...` => `asdf..#`) + # - invalid chars (`<>` => `##`) + sanitized_part = re.sub(r'[/<>:"\|\\?\*]|[\s.]$', '#', part) + sanitized_parts.append(sanitized_part) + + return sanitized_parts + + def sanitize_path(s, force=False): """Sanitizes and normalizes path on Windows""" - # XXX: this handles drive relative paths (c:sth) incorrectly - if sys.platform == 'win32': - force = False - drive_or_unc, _ = os.path.splitdrive(s) - elif force: - drive_or_unc = '' - else: - return s + if sys.platform != 'win32': + if not force: + return s + root = '/' if s.startswith('/') else '' + return root + '/'.join(_sanitize_path_parts(s.split('/'))) - norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep) - if drive_or_unc: - norm_path.pop(0) - sanitized_path = [ - path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part) - for path_part in norm_path] - if drive_or_unc: - sanitized_path.insert(0, drive_or_unc + os.path.sep) - elif force and s and s[0] == os.path.sep: - sanitized_path.insert(0, os.path.sep) - # TODO: Fix behavioral differences <3.12 - # The workaround using `normpath` only superficially passes tests - # Ref: https://github.com/python/cpython/pull/100351 - return os.path.normpath(os.path.join(*sanitized_path)) + normed = s.replace('/', '\\') + + if normed.startswith('\\\\'): + # UNC path (`\\SERVER\SHARE`) or device path (`\\.`, `\\?`) + parts = normed.split('\\') + root = '\\'.join(parts[:4]) + '\\' + parts = parts[4:] + elif normed[1:2] == ':': + # absolute path or drive relative path + offset = 3 if normed[2:3] == '\\' else 2 + root = normed[:offset] + parts = normed[offset:].split('\\') + else: + # relative/drive root relative path + root = '\\' if normed[:1] == '\\' else '' + parts = normed.split('\\') + + return root + '\\'.join(_sanitize_path_parts(parts)) def sanitize_url(url, *, scheme='http'): From d710a6ca7c622705c0c8c8a3615916f531137d5d Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Sun, 13 Oct 2024 05:14:32 +0200 Subject: [PATCH 11/44] Add extractor helpers (#10653) Authored by: Grub4K --- test/test_traversal.py | 79 ++++++++++++++++++- yt_dlp/extractor/common.py | 8 +- yt_dlp/utils/_utils.py | 27 ++++++- yt_dlp/utils/traversal.py | 158 ++++++++++++++++++++++++++++++++++++- 4 files changed, 261 insertions(+), 11 deletions(-) diff --git a/test/test_traversal.py b/test/test_traversal.py index 5d9fbe1d16..9179dadda4 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -4,8 +4,18 @@ import pytest -from yt_dlp.utils import dict_get, int_or_none, str_or_none -from yt_dlp.utils.traversal import traverse_obj +from yt_dlp.utils import ( + ExtractorError, + determine_ext, + dict_get, + int_or_none, + str_or_none, +) +from yt_dlp.utils.traversal import ( + traverse_obj, + require, + subs_list_to_dict, +) _TEST_DATA = { 100: 100, @@ -420,6 +430,71 @@ def test_traversal_morsel(self): assert traverse_obj(morsel, [(None,), any]) == morsel, \ 'Morsel should not be implicitly changed to dict on usage' + def test_traversal_filter(self): + data = [None, False, True, 0, 1, 0.0, 1.1, '', 'str', {}, {0: 0}, [], [1]] + + assert traverse_obj(data, [..., filter]) == [True, 1, 1.1, 'str', {0: 0}, [1]], \ + '`filter` should filter falsy values' + + +class TestTraversalHelpers: + def test_traversal_require(self): + with pytest.raises(ExtractorError): + traverse_obj(_TEST_DATA, ['None', {require('value')}]) + assert traverse_obj(_TEST_DATA, ['str', {require('value')}]) == 'str', \ + '`require` should pass through non `None` values' + + def test_subs_list_to_dict(self): + assert traverse_obj([ + {'name': 'de', 'url': 'https://example.com/subs/de.vtt'}, + {'name': 'en', 'url': 'https://example.com/subs/en1.ass'}, + {'name': 'en', 'url': 'https://example.com/subs/en2.ass'}, + ], [..., { + 'id': 'name', + 'url': 'url', + }, all, {subs_list_to_dict}]) == { + 'de': [{'url': 'https://example.com/subs/de.vtt'}], + 'en': [ + {'url': 'https://example.com/subs/en1.ass'}, + {'url': 'https://example.com/subs/en2.ass'}, + ], + }, 'function should build subtitle dict from list of subtitles' + assert traverse_obj([ + {'name': 'de', 'url': 'https://example.com/subs/de.ass'}, + {'name': 'de'}, + {'name': 'en', 'content': 'content'}, + {'url': 'https://example.com/subs/en'}, + ], [..., { + 'id': 'name', + 'data': 'content', + 'url': 'url', + }, all, {subs_list_to_dict}]) == { + 'de': [{'url': 'https://example.com/subs/de.ass'}], + 'en': [{'data': 'content'}], + }, 'subs with mandatory items missing should be filtered' + assert traverse_obj([ + {'url': 'https://example.com/subs/de.ass', 'name': 'de'}, + {'url': 'https://example.com/subs/en', 'name': 'en'}, + ], [..., { + 'id': 'name', + 'ext': ['url', {lambda x: determine_ext(x, default_ext=None)}], + 'url': 'url', + }, all, {subs_list_to_dict(ext='ext')}]) == { + 'de': [{'url': 'https://example.com/subs/de.ass', 'ext': 'ass'}], + 'en': [{'url': 'https://example.com/subs/en', 'ext': 'ext'}], + }, '`ext` should set default ext but leave existing value untouched' + assert traverse_obj([ + {'name': 'en', 'url': 'https://example.com/subs/en2', 'prio': True}, + {'name': 'en', 'url': 'https://example.com/subs/en1', 'prio': False}, + ], [..., { + 'id': 'name', + 'quality': ['prio', {int}], + 'url': 'url', + }, all, {subs_list_to_dict(ext='ext')}]) == {'en': [ + {'url': 'https://example.com/subs/en1', 'ext': 'ext'}, + {'url': 'https://example.com/subs/en2', 'ext': 'ext'}, + ]}, '`quality` key should sort subtitle list accordingly' + class TestDictGet: def test_dict_get(self): diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 3430036f4b..812fbfa9f9 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -573,13 +573,13 @@ class InfoExtractor: def _login_hint(self, method=NO_DEFAULT, netrc=None): password_hint = f'--username and --password, --netrc-cmd, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials' + cookies_hint = 'See https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp for how to manually pass cookies' return { None: '', - 'any': f'Use --cookies, --cookies-from-browser, {password_hint}', + 'any': f'Use --cookies, --cookies-from-browser, {password_hint}. {cookies_hint}', 'password': f'Use {password_hint}', - 'cookies': ( - 'Use --cookies-from-browser or --cookies for the authentication. ' - 'See https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp for how to manually pass cookies'), + 'cookies': f'Use --cookies-from-browser or --cookies for the authentication. {cookies_hint}', + 'session_cookies': f'Use --cookies for the authentication (--cookies-from-browser might not work). {cookies_hint}', }[method if method is not NO_DEFAULT else 'any' if self.supports_login() else 'cookies'] def __init__(self, downloader=None): diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 967f01fdf9..dd12466b89 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -1984,11 +1984,30 @@ def urljoin(base, path): return urllib.parse.urljoin(base, path) -def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1): +def partial_application(func): + sig = inspect.signature(func) + + @functools.wraps(func) + def wrapped(*args, **kwargs): + try: + sig.bind(*args, **kwargs) + except TypeError: + return functools.partial(func, *args, **kwargs) + else: + return func(*args, **kwargs) + + return wrapped + + +@partial_application +def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1, base=None): if get_attr and v is not None: v = getattr(v, get_attr, None) + if invscale == 1 and scale < 1: + invscale = int(1 / scale) + scale = 1 try: - return int(v) * invscale // scale + return (int(v) if base is None else int(v, base=base)) * invscale // scale except (ValueError, TypeError, OverflowError): return default @@ -2006,9 +2025,13 @@ def str_to_int(int_str): return int_or_none(int_str) +@partial_application def float_or_none(v, scale=1, invscale=1, default=None): if v is None: return default + if invscale == 1 and scale < 1: + invscale = int(1 / scale) + scale = 1 try: return float(v) * invscale / scale except (ValueError, TypeError): diff --git a/yt_dlp/utils/traversal.py b/yt_dlp/utils/traversal.py index 96eb2eddf5..b918487f98 100644 --- a/yt_dlp/utils/traversal.py +++ b/yt_dlp/utils/traversal.py @@ -1,18 +1,35 @@ +from __future__ import annotations + +import collections import collections.abc import contextlib +import functools import http.cookies import inspect import itertools import re +import typing import xml.etree.ElementTree from ._utils import ( IDENTITY, NO_DEFAULT, + ExtractorError, LazyList, deprecation_warning, + get_elements_html_by_class, + get_elements_html_by_attribute, + get_elements_by_attribute, + get_element_html_by_attribute, + get_element_by_attribute, + get_element_html_by_id, + get_element_by_id, + get_element_html_by_class, + get_elements_by_class, + get_element_text_and_html_by_tag, is_iterable_like, try_call, + url_or_none, variadic, ) @@ -54,6 +71,7 @@ def traverse_obj( Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`. - `any`-builtin: Take the first matching object and return it, resetting branching. - `all`-builtin: Take all matching objects and return them as a list, resetting branching. + - `filter`-builtin: Return the value if it is truthy, `None` otherwise. `tuple`, `list`, and `dict` all support nested paths and branches. @@ -247,6 +265,10 @@ def apply_path(start_obj, path, test_type): objs = (list(filtered_objs),) continue + if key is filter: + objs = filter(None, objs) + continue + if __debug__ and callable(key): # Verify function signature inspect.signature(key).bind(None, None) @@ -277,13 +299,143 @@ def _traverse_obj(obj, path, allow_empty, test_type): return results[0] if results else {} if allow_empty and is_dict else None for index, path in enumerate(paths, 1): - result = _traverse_obj(obj, path, index == len(paths), True) - if result is not None: - return result + is_last = index == len(paths) + try: + result = _traverse_obj(obj, path, is_last, True) + if result is not None: + return result + except _RequiredError as e: + if is_last: + # Reraise to get cleaner stack trace + raise ExtractorError(e.orig_msg, expected=e.expected) from None return None if default is NO_DEFAULT else default +def value(value, /): + return lambda _: value + + +def require(name, /, *, expected=False): + def func(value): + if value is None: + raise _RequiredError(f'Unable to extract {name}', expected=expected) + + return value + + return func + + +class _RequiredError(ExtractorError): + pass + + +@typing.overload +def subs_list_to_dict(*, ext: str | None = None) -> collections.abc.Callable[[list[dict]], dict[str, list[dict]]]: ... + + +@typing.overload +def subs_list_to_dict(subs: list[dict] | None, /, *, ext: str | None = None) -> dict[str, list[dict]]: ... + + +def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None): + """ + Convert subtitles from a traversal into a subtitle dict. + The path should have an `all` immediately before this function. + + Arguments: + `ext` The default value for `ext` in the subtitle dict + + In the dict you can set the following additional items: + `id` The subtitle id to sort the dict into + `quality` The sort order for each subtitle + """ + if subs is None: + return functools.partial(subs_list_to_dict, ext=ext) + + result = collections.defaultdict(list) + + for sub in subs: + if not url_or_none(sub.get('url')) and not sub.get('data'): + continue + sub_id = sub.pop('id', None) + if sub_id is None: + continue + if ext is not None and not sub.get('ext'): + sub['ext'] = ext + result[sub_id].append(sub) + result = dict(result) + + for subs in result.values(): + subs.sort(key=lambda x: x.pop('quality', 0) or 0) + + return result + + +@typing.overload +def find_element(*, attr: str, value: str, tag: str | None = None, html=False): ... + + +@typing.overload +def find_element(*, cls: str, html=False): ... + + +@typing.overload +def find_element(*, id: str, tag: str | None = None, html=False): ... + + +@typing.overload +def find_element(*, tag: str, html=False): ... + + +def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False): + # deliberately using `id=` and `cls=` for ease of readability + assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required' + if not tag: + tag = r'[\w:.-]+' + + if attr and value: + assert not cls, 'Cannot match both attr and cls' + assert not id, 'Cannot match both attr and id' + func = get_element_html_by_attribute if html else get_element_by_attribute + return functools.partial(func, attr, value, tag=tag) + + elif cls: + assert not id, 'Cannot match both cls and id' + assert tag is None, 'Cannot match both cls and tag' + func = get_element_html_by_class if html else get_elements_by_class + return functools.partial(func, cls) + + elif id: + func = get_element_html_by_id if html else get_element_by_id + return functools.partial(func, id, tag=tag) + + index = int(bool(html)) + return lambda html: get_element_text_and_html_by_tag(tag, html)[index] + + +@typing.overload +def find_elements(*, cls: str, html=False): ... + + +@typing.overload +def find_elements(*, attr: str, value: str, tag: str | None = None, html=False): ... + + +def find_elements(*, tag=None, cls=None, attr=None, value=None, html=False): + # deliberately using `cls=` for ease of readability + assert cls or (attr and value), 'One of cls or (attr AND value) is required' + + if attr and value: + assert not cls, 'Cannot match both attr and cls' + func = get_elements_html_by_attribute if html else get_elements_by_attribute + return functools.partial(func, attr, value, tag=tag or r'[\w:.-]+') + + assert not tag, 'Cannot match both cls and tag' + func = get_elements_html_by_class if html else get_elements_by_class + return functools.partial(func, cls) + + def get_first(obj, *paths, **kwargs): return traverse_obj(obj, *((..., *variadic(keys)) for keys in paths), **kwargs, get_all=False) From cba7868502f04175fecf9ab3e363296aee7ebec2 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Sun, 13 Oct 2024 14:27:01 +0800 Subject: [PATCH 12/44] [ie/reddit] Detect and raise when login is required (#11202) Closes #10924 Authored by: pzhlkj6612 --- yt_dlp/extractor/reddit.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index bc3e5f7eee..b633dc48af 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -1,3 +1,4 @@ +import json import urllib.parse from .common import InfoExtractor @@ -17,7 +18,7 @@ class RedditIE(InfoExtractor): _NETRC_MACHINE = 'reddit' - _VALID_URL = r'https?://(?P(?:\w+\.)?reddit(?:media)?\.com)/(?P(?:(?:r|user)/[^/]+/)?comments/(?P[^/?#&]+))' + _VALID_URL = r'https?://(?:\w+\.)?reddit(?:media)?\.com/(?P(?:(?:r|user)/[^/]+/)?comments/(?P[^/?#&]+))' _TESTS = [{ 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/', 'info_dict': { @@ -251,15 +252,15 @@ def _get_subtitles(self, video_id): return {'en': [{'url': caption_url}]} def _real_extract(self, url): - host, slug, video_id = self._match_valid_url(url).group('host', 'slug', 'id') + slug, video_id = self._match_valid_url(url).group('slug', 'id') - data = self._download_json( - f'https://{host}/{slug}/.json', video_id, fatal=False, expected_status=403) - if not data: - fallback_host = 'old.reddit.com' if host != 'old.reddit.com' else 'www.reddit.com' - self.to_screen(f'{host} request failed, retrying with {fallback_host}') + try: data = self._download_json( - f'https://{fallback_host}/{slug}/.json', video_id, expected_status=403) + f'https://www.reddit.com/{slug}/.json', video_id, expected_status=403) + except ExtractorError as e: + if isinstance(e.cause, json.JSONDecodeError): + self.raise_login_required('Account authentication is required') + raise if traverse_obj(data, 'error') == 403: reason = data.get('reason') From dcfeea4dd5e5686821350baa6c7767a011944867 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 14 Oct 2024 22:19:26 +0000 Subject: [PATCH 13/44] [ie/adobepass] Use newer user-agent for provider redirect request (#11250) Closes #10848 Authored by: bashonly --- yt_dlp/extractor/adobepass.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index eb7e597e52..7cc15ec7b6 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -1355,6 +1355,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' + _MODERN_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0' _MVPD_CACHE = 'ap-mvpd' _DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page' @@ -1454,7 +1455,11 @@ def extract_redirect_url(html, url=None, fatal=False): 'no_iframe': 'false', 'domain_name': 'adobe.com', 'redirect_url': url, - }) + }, headers={ + # yt-dlp's default user-agent is usually too old for Comcast_SSO + # See: https://github.com/yt-dlp/yt-dlp/issues/10848 + 'User-Agent': self._MODERN_USER_AGENT, + } if mso_id == 'Comcast_SSO' else None) elif not self._cookies_passed: raise_mvpd_required() From 64d84d75ca8c19ec06558cc7c511f5f4f7a822bc Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 15 Oct 2024 07:07:42 +0000 Subject: [PATCH 14/44] [build] Use `macos-13` image for macOS builds (#11236) Authored by: bashonly --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bd2e42d9af..495d3c6306 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -240,7 +240,7 @@ jobs: permissions: contents: read actions: write # For cleaning up cache - runs-on: macos-12 + runs-on: macos-13 steps: - uses: actions/checkout@v4 @@ -346,7 +346,7 @@ jobs: macos_legacy: needs: process if: inputs.macos_legacy - runs-on: macos-12 + runs-on: macos-13 steps: - uses: actions/checkout@v4 From fbc66e3ab35743cc847a21223c67d88bb463cd9c Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 16 Oct 2024 03:53:53 +0000 Subject: [PATCH 15/44] [utils] `Popen`: Reset PyInstaller environment (#11258) - Forces spawning independent subprocesses for exes bundled with PyInstaller>=6.10 - Fixes regression introduced in fb8b7f226d251e521a89b23c415e249e5b788e5c - Ref: https://pyinstaller.org/en/v6.10.0/CHANGES.html#incompatible-changes Closes #11259 Authored by: bashonly, Grub4K Co-authored-by: Simon Sawicki --- yt_dlp/utils/_utils.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index dd12466b89..27ebfefbcb 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -824,14 +824,18 @@ class Popen(subprocess.Popen): _startupinfo = None @staticmethod - def _fix_pyinstaller_ld_path(env): - """Restore LD_LIBRARY_PATH when using PyInstaller - Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations - https://github.com/yt-dlp/yt-dlp/issues/4573 - """ + def _fix_pyinstaller_issues(env): if not hasattr(sys, '_MEIPASS'): return + # Force spawning independent subprocesses for exes bundled with PyInstaller>=6.10 + # Ref: https://pyinstaller.org/en/v6.10.0/CHANGES.html#incompatible-changes + # https://github.com/yt-dlp/yt-dlp/issues/11259 + env['PYINSTALLER_RESET_ENVIRONMENT'] = '1' + + # Restore LD_LIBRARY_PATH when using PyInstaller + # Ref: https://pyinstaller.org/en/v6.10.0/runtime-information.html#ld-library-path-libpath-considerations + # https://github.com/yt-dlp/yt-dlp/issues/4573 def _fix(key): orig = env.get(f'{key}_ORIG') if orig is None: @@ -845,7 +849,7 @@ def _fix(key): def __init__(self, args, *remaining, env=None, text=False, shell=False, **kwargs): if env is None: env = os.environ.copy() - self._fix_pyinstaller_ld_path(env) + self._fix_pyinstaller_issues(env) self.__text_mode = kwargs.get('encoding') or kwargs.get('errors') or text or kwargs.get('universal_newlines') if text is True: From 7af1ddaaf2a6a0a750373a9ab53c7770af4f9fe4 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 19 Oct 2024 21:40:20 +0000 Subject: [PATCH 16/44] [ie/youtube] Fix `comment_count` extraction (#11274) Authored by: bashonly --- yt_dlp/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 6acc42fc0a..f41f57ed16 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4701,11 +4701,12 @@ def process_language(container, base_url, lang_code, sub_name, query): headers=self.generate_api_headers(ytcfg=master_ytcfg), note='Downloading initial data API JSON') + COMMENTS_SECTION_IDS = ('comment-item-section', 'engagement-panel-comments-section') info['comment_count'] = traverse_obj(initial_data, ( 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer', 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', ), ( - 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section', + 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] in COMMENTS_SECTION_IDS, 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', ), expected_type=self._get_count, get_all=False) From 3148c1822f66533998278f0a1cf842b9bea1526a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 19 Oct 2024 21:41:14 +0000 Subject: [PATCH 17/44] [ie/substack] Resolve podcast file extensions (#11275) Closes #4601 Authored by: bashonly --- yt_dlp/extractor/substack.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/substack.py b/yt_dlp/extractor/substack.py index 30cb322dc2..b70d40f2ca 100644 --- a/yt_dlp/extractor/substack.py +++ b/yt_dlp/extractor/substack.py @@ -2,7 +2,13 @@ import urllib.parse from .common import InfoExtractor -from ..utils import js_to_json, str_or_none, traverse_obj +from ..networking import HEADRequest +from ..utils import ( + determine_ext, + js_to_json, + str_or_none, +) +from ..utils.traversal import traverse_obj class SubstackIE(InfoExtractor): @@ -43,6 +49,19 @@ class SubstackIE(InfoExtractor): 'uploader': "Andrew Zimmern's Spilled Milk ", 'uploader_id': '577659', }, + }, { + # Podcast that needs its file extension resolved to mp3 + 'url': 'https://persuasion1.substack.com/p/summers', + 'md5': '1456a755d46084744facdfac9edf900f', + 'info_dict': { + 'id': '141970405', + 'ext': 'mp3', + 'title': 'Larry Summers on What Went Wrong on Campus', + 'description': 'Yascha Mounk and Larry Summers also discuss the promise and perils of artificial intelligence.', + 'thumbnail': r're:https://substackcdn\.com/image/.+\.jpeg', + 'uploader': 'Persuasion', + 'uploader_id': '61579', + }, }] @classmethod @@ -89,7 +108,15 @@ def _real_extract(self, url): post_type = webpage_info['post']['type'] formats, subtitles = [], {} if post_type == 'podcast': - formats, subtitles = [{'url': webpage_info['post']['podcast_url']}], {} + fmt = {'url': webpage_info['post']['podcast_url']} + if not determine_ext(fmt['url'], default_ext=None): + # The redirected format URL expires but the original URL doesn't, + # so we only want to extract the extension from this request + fmt['ext'] = determine_ext(self._request_webpage( + HEADRequest(fmt['url']), display_id, + 'Resolving podcast file extension', + 'Podcast URL is invalid').url) + formats.append(fmt) elif post_type == 'video': formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], canonical_url) else: From 679c68240a26481ea7c07cc0c014745631ea8481 Mon Sep 17 00:00:00 2001 From: rubyevadestaxes <147743127+rubyevadestaxes@users.noreply.github.com> Date: Sat, 19 Oct 2024 23:51:47 +0200 Subject: [PATCH 18/44] [ie/twitter:spaces] Allow extraction when not logged in (#11289) Closes #11288 Authored by: rubyevadestaxes --- yt_dlp/extractor/twitter.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py index aca94df2dd..5adaf16393 100644 --- a/yt_dlp/extractor/twitter.py +++ b/yt_dlp/extractor/twitter.py @@ -934,14 +934,13 @@ class TwitterIE(TwitterBaseIE): 'uploader_id': 'MoniqueCamarra', 'live_status': 'was_live', 'release_timestamp': 1658417414, - 'description': 'md5:acce559345fd49f129c20dbcda3f1201', + 'description': r're:Twitter Space participated by Sergej Sumlenny.+', 'timestamp': 1658407771, 'release_date': '20220721', 'upload_date': '20220721', }, 'add_ie': ['TwitterSpaces'], 'params': {'skip_download': 'm3u8'}, - 'skip': 'Requires authentication', }, { # URL specifies video number but --yes-playlist 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1', @@ -1856,8 +1855,6 @@ def _build_graphql_query(self, space_id): def _real_extract(self, url): space_id = self._match_id(url) - if not self.is_logged_in: - self.raise_login_required('Twitter Spaces require authentication') space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace'] if not space_data: raise ExtractorError('Twitter Space not found', expected=True) From 8de431ec97a4b62b73df8f686b6e21e462775336 Mon Sep 17 00:00:00 2001 From: sepro Date: Sun, 20 Oct 2024 15:18:15 +0200 Subject: [PATCH 19/44] [ie/Funk] Extend `_VALID_URL` (#11269) Authored by: seproDev --- yt_dlp/extractor/funk.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/funk.py b/yt_dlp/extractor/funk.py index 8bdea3fce7..ef8ea72a8c 100644 --- a/yt_dlp/extractor/funk.py +++ b/yt_dlp/extractor/funk.py @@ -3,7 +3,7 @@ class FunkIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.|origin\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P[0-9a-z-]+)-(?P\d+)' + _VALID_URL = r'https?://(?:(?:www|origin|play)\.)?funk\.net/(?:channel|playlist)/[^/?#]+/(?P[0-9a-z-]+)-(?P\d+)' _TESTS = [{ 'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821', 'md5': '8610449476156f338761a75391b0017d', @@ -27,6 +27,9 @@ class FunkIE(InfoExtractor): }, { 'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699', 'only_matching': True, + }, { + 'url': 'https://play.funk.net/playlist/neuesteVideos/george-floyd-wenn-die-polizei-toetet-der-fall-2004391', + 'only_matching': True, }] def _real_extract(self, url): From 0f593dca9fa995d88eb763170a932da61c8f24dc Mon Sep 17 00:00:00 2001 From: Imran Hussain Date: Sun, 20 Oct 2024 18:10:26 +0100 Subject: [PATCH 20/44] Add option `--plugin-dirs` (#11277) Closes #3260 Authored by: imranh2, coletdjnz Co-authored-by: coletdjnz --- README.md | 7 +++++++ test/test_plugins.py | 19 +++++++++++++++++++ .../yt_dlp_plugins/extractor/package.py | 5 +++++ yt_dlp/__init__.py | 5 +++++ yt_dlp/options.py | 8 ++++++++ yt_dlp/plugins.py | 7 +++++++ yt_dlp/utils/_utils.py | 4 ++++ 7 files changed, 55 insertions(+) create mode 100644 test/testdata/plugin_packages/testpackage/yt_dlp_plugins/extractor/package.py diff --git a/README.md b/README.md index 1cafe51d51..fc38a529a7 100644 --- a/README.md +++ b/README.md @@ -348,6 +348,13 @@ ## General Options: containing directory ("-" for stdin). Can be used multiple times and inside other configuration files + --plugin-dirs PATH Path to an additional directory to search + for plugins. This option can be used + multiple times to add multiple directories. + Note that this currently only works for + extractor plugins; postprocessor plugins can + only be loaded from the default plugin + directories --flat-playlist Do not extract the videos of a playlist, only list them --no-flat-playlist Fully extract the videos of a playlist diff --git a/test/test_plugins.py b/test/test_plugins.py index c82158e9fc..77545d136c 100644 --- a/test/test_plugins.py +++ b/test/test_plugins.py @@ -10,6 +10,7 @@ sys.path.append(str(TEST_DATA_DIR)) importlib.invalidate_caches() +from yt_dlp.utils import Config from yt_dlp.plugins import PACKAGE_NAME, directories, load_plugins @@ -68,6 +69,24 @@ def test_importing_zipped_module(self): os.remove(zip_path) importlib.invalidate_caches() # reset the import caches + def test_plugin_dirs(self): + # Internal plugin dirs hack for CLI --plugin-dirs + # To be replaced with proper system later + custom_plugin_dir = TEST_DATA_DIR / 'plugin_packages' + Config._plugin_dirs = [str(custom_plugin_dir)] + importlib.invalidate_caches() # reset the import caches + + try: + package = importlib.import_module(f'{PACKAGE_NAME}.extractor') + self.assertIn(custom_plugin_dir / 'testpackage' / PACKAGE_NAME / 'extractor', map(Path, package.__path__)) + + plugins_ie = load_plugins('extractor', 'IE') + self.assertIn('PackagePluginIE', plugins_ie.keys()) + + finally: + Config._plugin_dirs = [] + importlib.invalidate_caches() # reset the import caches + if __name__ == '__main__': unittest.main() diff --git a/test/testdata/plugin_packages/testpackage/yt_dlp_plugins/extractor/package.py b/test/testdata/plugin_packages/testpackage/yt_dlp_plugins/extractor/package.py new file mode 100644 index 0000000000..b860300d8d --- /dev/null +++ b/test/testdata/plugin_packages/testpackage/yt_dlp_plugins/extractor/package.py @@ -0,0 +1,5 @@ +from yt_dlp.extractor.common import InfoExtractor + + +class PackagePluginIE(InfoExtractor): + pass diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index f598b6c2fe..d976f5bbcb 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -34,6 +34,7 @@ ) from .update import Updater from .utils import ( + Config, NO_DEFAULT, POSTPROCESS_WHEN, DateRange, @@ -967,6 +968,10 @@ def _real_main(argv=None): parser, opts, all_urls, ydl_opts = parse_options(argv) + # HACK: Set the plugin dirs early on + # TODO(coletdjnz): remove when plugin globals system is implemented + Config._plugin_dirs = opts.plugin_dirs + # Dump user agent if opts.dump_user_agent: ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent']) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 9980b7fc3f..c3a647da77 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -408,6 +408,14 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): help=( 'Location of the main configuration file; either the path to the config or its containing directory ' '("-" for stdin). Can be used multiple times and inside other configuration files')) + general.add_option( + '--plugin-dirs', + dest='plugin_dirs', metavar='PATH', action='append', + help=( + 'Path to an additional directory to search for plugins. ' + 'This option can be used multiple times to add multiple directories. ' + 'Note that this currently only works for extractor plugins; ' + 'postprocessor plugins can only be loaded from the default plugin directories')) general.add_option( '--flat-playlist', action='store_const', dest='extract_flat', const='in_playlist', default=False, diff --git a/yt_dlp/plugins.py b/yt_dlp/plugins.py index d777d14e71..204558d603 100644 --- a/yt_dlp/plugins.py +++ b/yt_dlp/plugins.py @@ -15,6 +15,7 @@ from .compat import functools # isort: split from .utils import ( + Config, get_executable_path, get_system_config_dirs, get_user_config_dirs, @@ -84,6 +85,12 @@ def _get_package_paths(*root_paths, containing_folder='plugins'): with contextlib.suppress(ValueError): # Added when running __main__.py directly candidate_locations.remove(Path(__file__).parent) + # TODO(coletdjnz): remove when plugin globals system is implemented + if Config._plugin_dirs: + candidate_locations.extend(_get_package_paths( + *Config._plugin_dirs, + containing_folder='')) + parts = Path(*fullname.split('.')) for path in orderedSet(candidate_locations, lazy=True): candidate = path / parts diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 27ebfefbcb..ea748898f2 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -4897,6 +4897,10 @@ class Config: filename = None __initialized = False + # Internal only, do not use! Hack to enable --plugin-dirs + # TODO(coletdjnz): remove when plugin globals system is implemented + _plugin_dirs = None + def __init__(self, parser, label=None): self.parser, self.label = parser, label self._loaded_paths, self.configs = set(), [] From 5af774d7a36c00bea618c7047c9326532cd3f616 Mon Sep 17 00:00:00 2001 From: Deer-Spangle <60626596+Deer-Spangle@users.noreply.github.com> Date: Sun, 20 Oct 2024 21:58:53 +0100 Subject: [PATCH 21/44] [ie/imgur] Support new URL format (#11075) Authored by: Deer-Spangle --- yt_dlp/extractor/imgur.py | 68 ++++++++++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 12 deletions(-) diff --git a/yt_dlp/extractor/imgur.py b/yt_dlp/extractor/imgur.py index f0c3419d49..2a5a1c9e84 100644 --- a/yt_dlp/extractor/imgur.py +++ b/yt_dlp/extractor/imgur.py @@ -37,7 +37,7 @@ def get_description(s): class ImgurIE(ImgurBaseIE): - _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?:[^/?#]+-)?(?P[a-zA-Z0-9]+)' _TESTS = [{ 'url': 'https://imgur.com/A61SaA1', @@ -54,6 +54,22 @@ class ImgurIE(ImgurBaseIE): 'like_count': int, 'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg', }, + }, { + # Test with URL slug + 'url': 'https://imgur.com/mrw-gifv-is-up-running-without-any-bugs-A61SaA1', + 'info_dict': { + 'id': 'A61SaA1', + 'ext': 'mp4', + 'title': 'MRW gifv is up and running without any bugs', + 'timestamp': 1416446068, + 'upload_date': '20141120', + 'dislike_count': int, + 'comment_count': int, + 'release_timestamp': 1416446068, + 'release_date': '20141120', + 'like_count': int, + 'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg', + }, }, { 'url': 'https://i.imgur.com/A61SaA1.gifv', 'only_matching': True, @@ -92,6 +108,7 @@ class ImgurIE(ImgurBaseIE): 'comment_count': int, 'release_timestamp': 1710491255, 'release_date': '20240315', + 'thumbnail': 'https://i.imgur.com/zV03bd5h.jpg', }, }] @@ -252,17 +269,9 @@ def _real_extract(self, url): class ImgurGalleryIE(ImgurGalleryBaseIE): IE_NAME = 'imgur:gallery' - _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?:[^/?#]+-)?(?P[a-zA-Z0-9]+)' _TESTS = [{ - 'url': 'http://imgur.com/gallery/Q95ko', - 'info_dict': { - 'id': 'Q95ko', - 'title': 'Adding faces make every GIF better', - }, - 'playlist_count': 25, - 'skip': 'Zoinks! You\'ve taken a wrong turn.', - }, { # TODO: static images - replace with animated/video gallery 'url': 'http://imgur.com/topic/Aww/ll5Vk', 'only_matching': True, @@ -280,7 +289,27 @@ class ImgurGalleryIE(ImgurGalleryBaseIE): 'release_timestamp': 1358554297, 'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg', 'release_date': '20130119', - 'uploader_url': 'https://i.imgur.com/u3R4I2S_d.png?maxwidth=290&fidelity=grand', + 'uploader_url': 'https://i.imgur.com/N5Flb2v_d.png?maxwidth=290&fidelity=grand', + 'comment_count': int, + 'dislike_count': int, + 'like_count': int, + }, + }, { + # Test with slug + 'url': 'https://imgur.com/gallery/classic-steve-carell-gif-cracks-me-up-everytime-repost-downvotes-YcAQlkx', + 'add_ies': ['Imgur'], + 'info_dict': { + 'id': 'YcAQlkx', + 'ext': 'mp4', + 'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....', + 'timestamp': 1358554297, + 'upload_date': '20130119', + 'uploader_id': '1648642', + 'uploader': 'wittyusernamehere', + 'release_timestamp': 1358554297, + 'release_date': '20130119', + 'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg', + 'uploader_url': 'https://i.imgur.com/N5Flb2v_d.png?maxwidth=290&fidelity=grand', 'comment_count': int, 'dislike_count': int, 'like_count': int, @@ -317,6 +346,13 @@ class ImgurGalleryIE(ImgurGalleryBaseIE): 'title': 'Penguins !', }, 'playlist_count': 3, + }, { + 'url': 'https://imgur.com/t/unmuted/penguins-penguins-6lAn9VQ', + 'info_dict': { + 'id': '6lAn9VQ', + 'title': 'Penguins !', + }, + 'playlist_count': 3, }, { 'url': 'https://imgur.com/t/unmuted/kx2uD3C', 'add_ies': ['Imgur'], @@ -357,7 +393,7 @@ class ImgurGalleryIE(ImgurGalleryBaseIE): class ImgurAlbumIE(ImgurGalleryBaseIE): IE_NAME = 'imgur:album' - _VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?:[^/?#]+-)?(?P[a-zA-Z0-9]+)' _GALLERY = False _TESTS = [{ # TODO: only static images - replace with animated/video gallery @@ -372,6 +408,14 @@ class ImgurAlbumIE(ImgurGalleryBaseIE): 'title': 'enen-no-shouboutai', }, 'playlist_count': 2, + }, { + # Test with URL slug + 'url': 'https://imgur.com/a/enen-no-shouboutai-iX265HX', + 'info_dict': { + 'id': 'iX265HX', + 'title': 'enen-no-shouboutai', + }, + 'playlist_count': 2, }, { 'url': 'https://imgur.com/a/8pih2Ed', 'info_dict': { From c4d95f67ddc522297bb1fea875255cf94b34d595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Kotiuk?= Date: Sun, 20 Oct 2024 23:16:22 +0200 Subject: [PATCH 22/44] [ie/cda] Support folders (#10786) Closes #5429 Authored by: pktiuk --- yt_dlp/extractor/_extractors.py | 5 +++- yt_dlp/extractor/cda.py | 48 +++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 4b1f4c316d..8d59360949 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -363,7 +363,10 @@ ) from .ccma import CCMAIE from .cctv import CCTVIE -from .cda import CDAIE +from .cda import ( + CDAIE, + CDAFolderIE, +) from .cellebrite import CellebriteIE from .ceskatelevize import CeskaTelevizeIE from .cgtn import CGTNIE diff --git a/yt_dlp/extractor/cda.py b/yt_dlp/extractor/cda.py index 62ee8b17f1..b2738e492f 100644 --- a/yt_dlp/extractor/cda.py +++ b/yt_dlp/extractor/cda.py @@ -12,6 +12,7 @@ from ..compat import compat_ord from ..utils import ( ExtractorError, + OnDemandPagedList, float_or_none, int_or_none, merge_dicts, @@ -351,3 +352,50 @@ def extract_format(page, version): extract_format(webpage, resolution) return merge_dicts(info_dict, info) + + +class CDAFolderIE(InfoExtractor): + _MAX_PAGE_SIZE = 36 + _VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P\w+)/folder/(?P\d+)' + _TESTS = [ + { + 'url': 'https://www.cda.pl/domino264/folder/31188385', + 'info_dict': { + 'id': '31188385', + 'title': 'SERIA DRUGA', + }, + 'playlist_mincount': 13, + }, + { + 'url': 'https://www.cda.pl/smiechawaTV/folder/2664592/vfilm', + 'info_dict': { + 'id': '2664592', + 'title': 'VideoDowcipy - wszystkie odcinki', + }, + 'playlist_mincount': 71, + }, + { + 'url': 'https://www.cda.pl/DeliciousBeauty/folder/19129979/vfilm', + 'info_dict': { + 'id': '19129979', + 'title': 'TESTY KOSMETYKÓW', + }, + 'playlist_mincount': 139, + }] + + def _real_extract(self, url): + folder_id, channel = self._match_valid_url(url).group('id', 'channel') + + webpage = self._download_webpage(url, folder_id) + + def extract_page_entries(page): + webpage = self._download_webpage( + f'https://www.cda.pl/{channel}/folder/{folder_id}/vfilm/{page + 1}', folder_id, + f'Downloading page {page + 1}', expected_status=404) + items = re.findall(r']+href="/video/([0-9a-z]+)"', webpage) + for video_id in items: + yield self.url_result(f'https://www.cda.pl/video/{video_id}', CDAIE, video_id) + + return self.playlist_result( + OnDemandPagedList(extract_page_entries, self._MAX_PAGE_SIZE), + folder_id, self._og_search_title(webpage)) From 87408ccfd772ddf31a8323d8151c24f9577cbc9f Mon Sep 17 00:00:00 2001 From: sepro Date: Sun, 20 Oct 2024 23:18:11 +0200 Subject: [PATCH 23/44] [ie/imgur] Fix thumbnail extraction (#11298) Authored by: seproDev --- yt_dlp/extractor/imgur.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/imgur.py b/yt_dlp/extractor/imgur.py index 2a5a1c9e84..e2644e6a40 100644 --- a/yt_dlp/extractor/imgur.py +++ b/yt_dlp/extractor/imgur.py @@ -225,7 +225,10 @@ def og_get_size(media_type): }), get_all=False), 'id': video_id, 'formats': formats, - 'thumbnail': url_or_none(search('thumbnailUrl')), + 'thumbnails': [{ + 'url': thumbnail_url, + 'http_headers': {'Accept': '*/*'}, + }] if (thumbnail_url := search(['thumbnailUrl', 'twitter:image', 'og:image'])) else None, 'http_headers': {'Accept': '*/*'}, } From ec2f4bf0823a13043f98f5bd0bf6677837bf09dc Mon Sep 17 00:00:00 2001 From: bashonly Date: Sun, 20 Oct 2024 17:25:29 -0500 Subject: [PATCH 24/44] [ie/youtube] Remove broken age-restriction workaround (#11297) Closes #11296 Authored by: bashonly --- yt_dlp/extractor/youtube.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index f41f57ed16..60492fff9f 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -114,6 +114,7 @@ }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 67, }, + # This client now requires sign-in for every video 'web_creator': { 'INNERTUBE_CONTEXT': { 'client': { @@ -153,6 +154,7 @@ 'REQUIRE_JS_PLAYER': False, 'REQUIRE_PO_TOKEN': True, }, + # This client now requires sign-in for every video 'android_creator': { 'INNERTUBE_CONTEXT': { 'client': { @@ -201,6 +203,7 @@ 'PLAYER_PARAMS': '2AMB', }, # This client only has legacy formats and storyboards + # BROKEN: Unable to download API page: HTTP Error 403: Forbidden "The caller does not have permission" 'android_producer': { 'INNERTUBE_CONTEXT': { 'client': { @@ -247,6 +250,7 @@ 'INNERTUBE_CONTEXT_CLIENT_NAME': 26, 'REQUIRE_JS_PLAYER': False, }, + # This client now requires sign-in for every video 'ios_creator': { 'INNERTUBE_CONTEXT': { 'client': { @@ -282,8 +286,9 @@ }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 7, }, - # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option) - # See: https://github.com/zerodytrash/YouTube-Internal-Clients + # This client now requires sign-in for every video + # It was previously an age-gate workaround for videos that were `playable_in_embed` + # It may still be useful if signed into an EU account that is not age-verified 'tv_embedded': { 'INNERTUBE_CONTEXT': { 'client': { @@ -1525,6 +1530,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'heatmap': 'count:100', 'timestamp': 1401991663, }, + 'skip': 'Age-restricted; requires authentication', }, { 'note': 'Age-gate video with embed allowed in public site', @@ -1555,6 +1561,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'comment_count': int, 'channel_is_verified': True, }, + 'skip': 'Age-restricted; requires authentication', }, { 'note': 'Age-gate video embedable only with clientScreen=EMBED', @@ -1585,6 +1592,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@ProjektMelody', 'timestamp': 1577508724, }, + 'skip': 'Age-restricted; requires authentication', }, { 'note': 'Non-Agegated non-embeddable video', @@ -2356,6 +2364,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'channel_is_verified': True, 'timestamp': 1405513526, }, + 'skip': 'Age-restricted; requires authentication', }, { # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685 @@ -2726,6 +2735,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'timestamp': 1577508724, }, 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'}, + 'skip': 'Age-restricted; requires authentication', }, { 'url': 'https://www.youtube.com/live/qVv6vCqciTM', @@ -3953,26 +3963,15 @@ def append_client(*client_names): else: prs.append(pr) - # tv_embedded can work around age-gate and age-verification IF the video is embeddable - if self._is_agegated(pr) and variant != 'tv_embedded': - append_client(f'tv_embedded.{base_client}') - - # Unauthenticated users will only get tv_embedded client formats if age-gated - if self._is_agegated(pr) and not self.is_authenticated: - self.to_screen( - f'{video_id}: This video is age-restricted; some formats may be missing ' - f'without authentication. {self._login_hint()}', only_once=True) - # EU countries require age-verification for accounts to access age-restricted videos # If account is not age-verified, _is_agegated() will be truthy for non-embedded clients - # If embedding is disabled for the video, _is_unplayable() will be truthy for tv_embedded - embedding_is_disabled = variant == 'tv_embedded' and self._is_unplayable(pr) - if self.is_authenticated and (self._is_agegated(pr) or embedding_is_disabled): + if self.is_authenticated and self._is_agegated(pr): self.to_screen( f'{video_id}: This video is age-restricted and YouTube is requiring ' 'account age-verification; some formats may be missing', only_once=True) # web_creator and mediaconnect can work around the age-verification requirement - # _producer, _testsuite, & _vr variants can also work around age-verification + # _testsuite & _vr variants can also work around age-verification + # tv_embedded may(?) still work around age-verification if the video is embeddable append_client('web_creator', 'mediaconnect') prs.extend(deprioritized_prs) From fed53d70bdb7d3e37ef63dd7fcf0ef74356167fd Mon Sep 17 00:00:00 2001 From: bashonly Date: Sun, 20 Oct 2024 14:49:00 -0500 Subject: [PATCH 25/44] [ie/youtube] Remove broken `android_producer` client (#11297) Authored by: bashonly --- yt_dlp/extractor/youtube.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 60492fff9f..728cb06966 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -202,22 +202,6 @@ 'REQUIRE_JS_PLAYER': False, 'PLAYER_PARAMS': '2AMB', }, - # This client only has legacy formats and storyboards - # BROKEN: Unable to download API page: HTTP Error 403: Forbidden "The caller does not have permission" - 'android_producer': { - 'INNERTUBE_CONTEXT': { - 'client': { - 'clientName': 'ANDROID_PRODUCER', - 'clientVersion': '0.111.1', - 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.apps.youtube.producer/0.111.1 (Linux; U; Android 11) gzip', - 'osName': 'Android', - 'osVersion': '11', - }, - }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 91, - 'REQUIRE_JS_PLAYER': False, - }, # iOS clients have HLS live streams. Setting device model to get 60fps formats. # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558 'ios': { From 40054cb4a7ebbea30d335d444e6f58b298a3baa0 Mon Sep 17 00:00:00 2001 From: David Skrundz Date: Mon, 21 Oct 2024 12:56:43 -0600 Subject: [PATCH 26/44] [ie/gem.cbc.ca] Fix formats extraction (#11196) Also extracts `timestamp` and `release_timestamp` as seconds instead of milliseconds Authored by: DavidSkrundz --- yt_dlp/extractor/cbc.py | 82 +++++++++++++---------------------------- 1 file changed, 26 insertions(+), 56 deletions(-) diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index 40224f63f5..b44c23fa10 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -4,7 +4,6 @@ import re import time import urllib.parse -import xml.etree.ElementTree from .common import InfoExtractor from ..networking import HEADRequest @@ -12,7 +11,6 @@ ExtractorError, float_or_none, int_or_none, - join_nonempty, js_to_json, mimetype2ext, orderedSet, @@ -524,14 +522,13 @@ class CBCGemIE(InfoExtractor): _TESTS = [{ # This is a normal, public, TV show video 'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01', - 'md5': '93dbb31c74a8e45b378cf13bd3f6f11e', 'info_dict': { 'id': 'schitts-creek/s06e01', 'ext': 'mp4', 'title': 'Smoke Signals', 'description': 'md5:929868d20021c924020641769eb3e7f1', - 'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_06e01_thumbnail_v01.jpg?im=Resize=(Size)', - 'duration': 1314, + 'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_06e01_thumbnail_v01\.jpg', + 'duration': 1324, 'categories': ['comedy'], 'series': 'Schitt\'s Creek', 'season': 'Season 6', @@ -539,19 +536,21 @@ class CBCGemIE(InfoExtractor): 'episode': 'Smoke Signals', 'episode_number': 1, 'episode_id': 'schitts-creek/s06e01', + 'upload_date': '20210618', + 'timestamp': 1623988800, + 'release_date': '20200107', + 'release_timestamp': 1578427200, }, 'params': {'format': 'bv'}, - 'skip': 'Geo-restricted to Canada', }, { # This video requires an account in the browser, but works fine in yt-dlp 'url': 'https://gem.cbc.ca/media/schitts-creek/s01e01', - 'md5': '297a9600f554f2258aed01514226a697', 'info_dict': { 'id': 'schitts-creek/s01e01', 'ext': 'mp4', 'title': 'The Cup Runneth Over', 'description': 'md5:9bca14ea49ab808097530eb05a29e797', - 'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_01e01_thumbnail_v01.jpg?im=Resize=(Size)', + 'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_01e01_thumbnail_v01\.jpg', 'series': 'Schitt\'s Creek', 'season_number': 1, 'season': 'Season 1', @@ -560,9 +559,12 @@ class CBCGemIE(InfoExtractor): 'episode_id': 'schitts-creek/s01e01', 'duration': 1309, 'categories': ['comedy'], + 'upload_date': '20210617', + 'timestamp': 1623902400, + 'release_date': '20151124', + 'release_timestamp': 1448323200, }, 'params': {'format': 'bv'}, - 'skip': 'Geo-restricted to Canada', }, { 'url': 'https://gem.cbc.ca/nadiyas-family-favourites/s01e01', 'only_matching': True, @@ -631,38 +633,6 @@ def _real_initialize(self): return self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token') - def _find_secret_formats(self, formats, video_id): - """ Find a valid video url and convert it to the secret variant """ - base_format = next((f for f in formats if f.get('vcodec') != 'none'), None) - if not base_format: - return - - base_url = re.sub(r'(Manifest\(.*?),filter=[\w-]+(.*?\))', r'\1\2', base_format['url']) - url = re.sub(r'(Manifest\(.*?),format=[\w-]+(.*?\))', r'\1\2', base_url) - - secret_xml = self._download_xml(url, video_id, note='Downloading secret XML', fatal=False) - if not isinstance(secret_xml, xml.etree.ElementTree.Element): - return - - for child in secret_xml: - if child.attrib.get('Type') != 'video': - continue - for video_quality in child: - bitrate = int_or_none(video_quality.attrib.get('Bitrate')) - if not bitrate or 'Index' not in video_quality.attrib: - continue - height = int_or_none(video_quality.attrib.get('MaxHeight')) - - yield { - **base_format, - 'format_id': join_nonempty('sec', height), - # Note: \g<1> is necessary instead of \1 since bitrate is a number - 'url': re.sub(r'(QualityLevels\()\d+(\))', fr'\g<1>{bitrate}\2', base_url), - 'width': int_or_none(video_quality.attrib.get('MaxWidth')), - 'tbr': bitrate / 1000.0, - 'height': height, - } - def _real_extract(self, url): video_id = self._match_id(url) video_info = self._download_json( @@ -676,7 +646,6 @@ def _real_extract(self, url): else: headers = {} m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers) - m3u8_url = m3u8_info.get('url') if m3u8_info.get('errorCode') == 1: self.raise_geo_restricted(countries=['CA']) @@ -685,9 +654,9 @@ def _real_extract(self, url): elif m3u8_info.get('errorCode') != 0: raise ExtractorError(f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}') - formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls') + formats = self._extract_m3u8_formats( + m3u8_info['url'], video_id, 'mp4', m3u8_id='hls', query={'manifestType': ''}) self._remove_duplicate_formats(formats) - formats.extend(self._find_secret_formats(formats, video_id)) for fmt in formats: if fmt.get('vcodec') == 'none': @@ -703,20 +672,21 @@ def _real_extract(self, url): return { 'id': video_id, - 'title': video_info['title'], - 'description': video_info.get('description'), - 'thumbnail': video_info.get('image'), - 'series': video_info.get('series'), - 'season_number': video_info.get('season'), - 'season': f'Season {video_info.get("season")}', - 'episode_number': video_info.get('episode'), - 'episode': video_info.get('title'), 'episode_id': video_id, - 'duration': video_info.get('duration'), - 'categories': [video_info.get('category')], 'formats': formats, - 'release_timestamp': video_info.get('airDate'), - 'timestamp': video_info.get('availableDate'), + **traverse_obj(video_info, { + 'title': ('title', {str}), + 'episode': ('title', {str}), + 'description': ('description', {str}), + 'thumbnail': ('image', {url_or_none}), + 'series': ('series', {str}), + 'season_number': ('season', {int_or_none}), + 'episode_number': ('episode', {int_or_none}), + 'duration': ('duration', {int_or_none}), + 'categories': ('category', {str}, all), + 'release_timestamp': ('airDate', {int_or_none(scale=1000)}), + 'timestamp': ('availableDate', {int_or_none(scale=1000)}), + }), } From 0b7ec08816fb196cd41d392f8331b4eb8366c4f8 Mon Sep 17 00:00:00 2001 From: DarkZeros Date: Mon, 21 Oct 2024 22:18:12 +0100 Subject: [PATCH 27/44] [ie/telecinco] Fix extractors (#11142) Closes #10986, Closes #11106 Authored by: DarkZeros, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/mitele.py | 26 ++++++- yt_dlp/extractor/telecinco.py | 140 +++++++++++++++++++--------------- 2 files changed, 103 insertions(+), 63 deletions(-) diff --git a/yt_dlp/extractor/mitele.py b/yt_dlp/extractor/mitele.py index ea29986729..3573a2a3fd 100644 --- a/yt_dlp/extractor/mitele.py +++ b/yt_dlp/extractor/mitele.py @@ -1,14 +1,13 @@ -from .telecinco import TelecincoIE +from .telecinco import TelecincoBaseIE from ..utils import ( int_or_none, parse_iso8601, ) -class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE +class MiTeleIE(TelecincoBaseIE): IE_DESC = 'mitele.es' _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P[^/]+)/player' - _TESTS = [{ 'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player', 'info_dict': { @@ -27,6 +26,7 @@ class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE 'timestamp': 1471209401, 'upload_date': '20160814', }, + 'skip': 'HTTP Error 404 Not Found', }, { # no explicit title 'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player', @@ -49,6 +49,26 @@ class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE 'params': { 'skip_download': True, }, + 'skip': 'HTTP Error 404 Not Found', + }, { + 'url': 'https://www.mitele.es/programas-tv/horizonte/temporada-5/programa-171-40_013480051/player/', + 'info_dict': { + 'id': '7adbe22e-cd41-4787-afa4-36f3da7c2c6f', + 'ext': 'mp4', + 'title': 'Horizonte Temporada 5 Programa 171', + 'description': 'md5:97f1fb712c5ac27e5693a8b3c5c0c6e3', + 'episode': 'Las Zonas de Bajas Emisiones, a debate', + 'episode_number': 171, + 'season': 'Season 5', + 'season_number': 5, + 'series': 'Horizonte', + 'duration': 7012, + 'upload_date': '20240927', + 'timestamp': 1727416450, + 'thumbnail': 'https://album.mediaset.es/eimg/2024/09/27/horizonte-171_9f02.jpg', + 'age_limit': 12, + }, + 'params': {'geo_bypass_country': 'ES'}, }, { 'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player', 'only_matching': True, diff --git a/yt_dlp/extractor/telecinco.py b/yt_dlp/extractor/telecinco.py index 7a9dcd71c5..9ef621446d 100644 --- a/yt_dlp/extractor/telecinco.py +++ b/yt_dlp/extractor/telecinco.py @@ -2,15 +2,69 @@ import re from .common import InfoExtractor +from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, clean_html, int_or_none, + join_nonempty, str_or_none, - try_get, + traverse_obj, + update_url, + url_or_none, ) -class TelecincoIE(InfoExtractor): +class TelecincoBaseIE(InfoExtractor): + def _parse_content(self, content, url): + video_id = content['dataMediaId'] + config = self._download_json( + content['dataConfig'], video_id, 'Downloading config JSON') + services = config['services'] + caronte = self._download_json(services['caronte'], video_id) + if traverse_obj(caronte, ('dls', 0, 'drm', {bool})): + self.report_drm(video_id) + + stream = caronte['dls'][0]['stream'] + headers = { + 'Referer': url, + 'Origin': re.match(r'https?://[^/]+', url).group(0), + } + geo_headers = {**headers, **self.geo_verification_headers()} + + try: + cdn = self._download_json( + caronte['cerbero'], video_id, data=json.dumps({ + 'bbx': caronte['bbx'], + 'gbx': self._download_json(services['gbx'], video_id)['gbx'], + }).encode(), headers={ + 'Content-Type': 'application/json', + **geo_headers, + })['tokens']['1']['cdn'] + except ExtractorError as error: + if isinstance(error.cause, HTTPError) and error.cause.status == 403: + error_code = traverse_obj( + self._webpage_read_content(error.cause.response, caronte['cerbero'], video_id, fatal=False), + ({json.loads}, 'code', {int})) + if error_code == 4038: + self.raise_geo_restricted(countries=['ES']) + raise + + formats = self._extract_m3u8_formats( + update_url(stream, query=cdn), video_id, 'mp4', m3u8_id='hls', headers=geo_headers) + + return { + 'id': video_id, + 'title': traverse_obj(config, ('info', 'title', {str})), + 'formats': formats, + 'thumbnail': (traverse_obj(content, ('dataPoster', {url_or_none})) + or traverse_obj(config, 'poster', 'imageUrl', expected_type=url_or_none)), + 'duration': traverse_obj(content, ('dataDuration', {int_or_none})), + 'http_headers': headers, + } + + +class TelecincoIE(TelecincoBaseIE): IE_DESC = 'telecinco.es, cuatro.com and mediaset.es' _VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P.+?)\.html' @@ -30,6 +84,7 @@ class TelecincoIE(InfoExtractor): 'duration': 662, }, }], + 'skip': 'HTTP Error 410 Gone', }, { 'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html', 'md5': 'c86fe0d99e3bdb46b7950d38bf6ef12a', @@ -40,23 +95,24 @@ class TelecincoIE(InfoExtractor): 'description': 'md5:a62ecb5f1934fc787107d7b9a2262805', 'duration': 79, }, + 'skip': 'Redirects to main page', }, { 'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html', - 'md5': 'eddb50291df704ce23c74821b995bcac', + 'md5': '5ce057f43f30b634fbaf0f18c71a140a', 'info_dict': { 'id': 'aywerkD2Sv1vGNqq9b85Q2', 'ext': 'mp4', 'title': '#DOYLACARA. Con la trata no hay trato', - 'description': 'md5:2771356ff7bfad9179c5f5cd954f1477', 'duration': 50, + 'thumbnail': 'https://album.mediaset.es/eimg/2017/11/02/1tlQLO5Q3mtKT24f3EaC24.jpg', }, }, { # video in opening's content 'url': 'https://www.telecinco.es/vivalavida/fiorella-sobrina-edmundo-arrocet-entrevista_18_2907195140.html', 'info_dict': { - 'id': '2907195140', + 'id': '1691427', 'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"', - 'description': 'md5:73f340a7320143d37ab895375b2bf13a', + 'description': r're:Fiorella, la sobrina de Edmundo Arrocet, concedió .{727}', }, 'playlist': [{ 'md5': 'adb28c37238b675dad0f042292f209a7', @@ -65,6 +121,7 @@ class TelecincoIE(InfoExtractor): 'ext': 'mp4', 'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"', 'duration': 1015, + 'thumbnail': 'https://album.mediaset.es/eimg/2020/02/29/5opaC37lUhKlZ7FoDhiVC.jpg', }, }], 'params': { @@ -81,66 +138,29 @@ class TelecincoIE(InfoExtractor): 'only_matching': True, }] - def _parse_content(self, content, url): - video_id = content['dataMediaId'] - config = self._download_json( - content['dataConfig'], video_id, 'Downloading config JSON') - title = config['info']['title'] - services = config['services'] - caronte = self._download_json(services['caronte'], video_id) - stream = caronte['dls'][0]['stream'] - headers = self.geo_verification_headers() - headers.update({ - 'Content-Type': 'application/json;charset=UTF-8', - 'Origin': re.match(r'https?://[^/]+', url).group(0), - }) - cdn = self._download_json( - caronte['cerbero'], video_id, data=json.dumps({ - 'bbx': caronte['bbx'], - 'gbx': self._download_json(services['gbx'], video_id)['gbx'], - }).encode(), headers=headers)['tokens']['1']['cdn'] - formats = self._extract_m3u8_formats( - stream + '?' + cdn, video_id, 'mp4', 'm3u8_native', m3u8_id='hls') - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'), - 'duration': int_or_none(content.get('dataDuration')), - } - def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - article = self._parse_json(self._search_regex( - r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=\s*({.+})', - webpage, 'article'), display_id)['article'] - title = article.get('title') - description = clean_html(article.get('leadParagraph')) or '' + article = self._search_json( + r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=', + webpage, 'article', display_id)['article'] + description = traverse_obj(article, ('leadParagraph', {clean_html}, filter)) + if article.get('editorialType') != 'VID': entries = [] - body = [article.get('opening')] - body.extend(try_get(article, lambda x: x['body'], list) or []) - for p in body: - if not isinstance(p, dict): - continue - content = p.get('content') - if not content: - continue + + for p in traverse_obj(article, ((('opening', all), 'body'), lambda _, v: v['content'])): + content = p['content'] type_ = p.get('type') - if type_ == 'paragraph': - content_str = str_or_none(content) - if content_str: - description += content_str - continue - if type_ == 'video' and isinstance(content, dict): + if type_ == 'paragraph' and isinstance(content, str): + description = join_nonempty(description, content, delim='') + elif type_ == 'video' and isinstance(content, dict): entries.append(self._parse_content(content, url)) + return self.playlist_result( - entries, str_or_none(article.get('id')), title, description) - content = article['opening']['content'] - info = self._parse_content(content, url) - info.update({ - 'description': description, - }) + entries, str_or_none(article.get('id')), + traverse_obj(article, ('title', {str})), clean_html(description)) + + info = self._parse_content(article['opening']['content'], url) + info['description'] = description return info From 46fe60ff19395698a87113b2944453779e04ab9d Mon Sep 17 00:00:00 2001 From: 63427083dev <77916527+63427083@users.noreply.github.com> Date: Tue, 22 Oct 2024 01:42:45 +0200 Subject: [PATCH 28/44] [ie/afreecatv] Adapt extractors to new sooplive.co.kr domain (#11266) Closes #11253 Authored by: 63427083, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/afreecatv.py | 185 +++++++++++++--------------------- 1 file changed, 68 insertions(+), 117 deletions(-) diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index 815d20537f..83e510d1a2 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -33,21 +33,21 @@ def _perform_login(self, username, password): } response = self._download_json( - 'https://login.afreecatv.com/app/LoginAction.php', None, + 'https://login.sooplive.co.kr/app/LoginAction.php', None, 'Logging in', data=urlencode_postdata(login_form)) _ERRORS = { -4: 'Your account has been suspended due to a violation of our terms and policies.', - -5: 'https://member.afreecatv.com/app/user_delete_progress.php', - -6: 'https://login.afreecatv.com/membership/changeMember.php', - -8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.", - -9: 'https://member.afreecatv.com/app/pop_login_block.php', - -11: 'https://login.afreecatv.com/afreeca/second_login.php', - -12: 'https://member.afreecatv.com/app/user_security.php', + -5: 'https://member.sooplive.co.kr/app/user_delete_progress.php', + -6: 'https://login.sooplive.co.kr/membership/changeMember.php', + -8: "Hello! Soop here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.", + -9: 'https://member.sooplive.co.kr/app/pop_login_block.php', + -11: 'https://login.sooplive.co.kr/afreeca/second_login.php', + -12: 'https://member.sooplive.co.kr/app/user_security.php', 0: 'The username does not exist or you have entered the wrong password.', -1: 'The username does not exist or you have entered the wrong password.', -3: 'You have entered your username/password incorrectly.', - -7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.', + -7: 'You cannot use your Global Soop account to access Korean Soop.', -10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.', -32008: 'You have failed to log in. Please contact our Help Center.', } @@ -61,76 +61,40 @@ def _perform_login(self, username, password): def _call_api(self, endpoint, display_id, data=None, headers=None, query=None): return self._download_json(Request( - f'https://api.m.afreecatv.com/{endpoint}', + f'https://api.m.sooplive.co.kr/{endpoint}', data=data, headers=headers, query=query, extensions={'legacy_ssl': True}), display_id, 'Downloading API JSON', 'Unable to download API JSON') class AfreecaTVIE(AfreecaTVBaseIE): - IE_NAME = 'afreecatv' - IE_DESC = 'afreecatv.com' - _VALID_URL = r'''(?x) - https?:// - (?: - (?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)? - (?: - /app/(?:index|read_ucc_bbs)\.cgi| - /player/[Pp]layer\.(?:swf|html) - )\?.*?\bnTitleNo=| - vod\.afreecatv\.com/(PLAYER/STATION|player)/ - ) - (?P\d+)/?(?:$|[?#&]) - ''' + IE_NAME = 'soop' + IE_DESC = 'sooplive.co.kr' + _VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/(?:PLAYER/STATION|player)/(?P\d+)/?(?:$|[?#&])' _TESTS = [{ - 'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=', - 'md5': 'f72c89fe7ecc14c1b5ce506c4996046e', + 'url': 'https://vod.sooplive.co.kr/player/96753363', 'info_dict': { - 'id': '36164052', + 'id': '20230108_9FF5BEE1_244432674_1', 'ext': 'mp4', - 'title': '데일리 에이프릴 요정들의 시상식!', - 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', - 'uploader': 'dailyapril', - 'uploader_id': 'dailyapril', - 'upload_date': '20160503', + 'uploader_id': 'rlantnghks', + 'uploader': '페이즈으', + 'duration': 10840, + 'thumbnail': r're:https?://videoimg\.sooplive\.co/.kr/.+', + 'upload_date': '20230108', + 'timestamp': 1673218805, + 'title': '젠지 페이즈', }, - 'skip': 'Video is gone', - }, { - 'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867', - 'info_dict': { - 'id': '36153164', - 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", - 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', - 'uploader': 'dailyapril', - 'uploader_id': 'dailyapril', + 'params': { + 'skip_download': True, }, - 'playlist_count': 2, - 'playlist': [{ - 'md5': 'd8b7c174568da61d774ef0203159bf97', - 'info_dict': { - 'id': '36153164_1', - 'ext': 'mp4', - 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", - 'upload_date': '20160502', - }, - }, { - 'md5': '58f2ce7f6044e34439ab2d50612ab02b', - 'info_dict': { - 'id': '36153164_2', - 'ext': 'mp4', - 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", - 'upload_date': '20160502', - }, - }], - 'skip': 'Video is gone', }, { # non standard key - 'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605', + 'url': 'http://vod.sooplive.co.kr/PLAYER/STATION/20515605', 'info_dict': { 'id': '20170411_BE689A0E_190960999_1_2_h', 'ext': 'mp4', 'title': '혼자사는여자집', - 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', + 'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+', 'uploader': '♥이슬이', 'uploader_id': 'dasl8121', 'upload_date': '20170411', @@ -142,12 +106,12 @@ class AfreecaTVIE(AfreecaTVBaseIE): }, }, { # adult content - 'url': 'https://vod.afreecatv.com/player/97267690', + 'url': 'https://vod.sooplive.co.kr/player/97267690', 'info_dict': { 'id': '20180327_27901457_202289533_1', 'ext': 'mp4', 'title': '[생]빨개요♥ (part 1)', - 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', + 'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+', 'uploader': '[SA]서아', 'uploader_id': 'bjdyrksu', 'upload_date': '20180327', @@ -157,36 +121,17 @@ class AfreecaTVIE(AfreecaTVBaseIE): 'skip_download': True, }, 'skip': 'The VOD does not exist', - }, { - 'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652', - 'only_matching': True, - }, { - 'url': 'https://vod.afreecatv.com/player/96753363', - 'info_dict': { - 'id': '20230108_9FF5BEE1_244432674_1', - 'ext': 'mp4', - 'uploader_id': 'rlantnghks', - 'uploader': '페이즈으', - 'duration': 10840, - 'thumbnail': r're:https?://videoimg\.afreecatv\.com/.+', - 'upload_date': '20230108', - 'timestamp': 1673218805, - 'title': '젠지 페이즈', - }, - 'params': { - 'skip_download': True, - }, }, { # adult content - 'url': 'https://vod.afreecatv.com/player/70395877', + 'url': 'https://vod.sooplive.co.kr/player/70395877', 'only_matching': True, }, { # subscribers only - 'url': 'https://vod.afreecatv.com/player/104647403', + 'url': 'https://vod.sooplive.co.kr/player/104647403', 'only_matching': True, }, { # private - 'url': 'https://vod.afreecatv.com/player/81669846', + 'url': 'https://vod.sooplive.co.kr/player/81669846', 'only_matching': True, }] @@ -262,11 +207,11 @@ def _real_extract(self, url): class AfreecaTVCatchStoryIE(AfreecaTVBaseIE): - IE_NAME = 'afreecatv:catchstory' - IE_DESC = 'afreecatv.com catch story' - _VALID_URL = r'https?://vod\.afreecatv\.com/player/(?P\d+)/catchstory' + IE_NAME = 'soop:catchstory' + IE_DESC = 'sooplive.co.kr catch story' + _VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/player/(?P\d+)/catchstory' _TESTS = [{ - 'url': 'https://vod.afreecatv.com/player/103247/catchstory', + 'url': 'https://vod.sooplive.co.kr/player/103247/catchstory', 'info_dict': { 'id': '103247', }, @@ -299,11 +244,11 @@ def _entries(data): class AfreecaTVLiveIE(AfreecaTVBaseIE): - IE_NAME = 'afreecatv:live' - IE_DESC = 'afreecatv.com livestreams' - _VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P[^/]+)(?:/(?P\d+))?' + IE_NAME = 'soop:live' + IE_DESC = 'sooplive.co.kr livestreams' + _VALID_URL = r'https?://play\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P[^/?#]+)(?:/(?P\d+))?' _TESTS = [{ - 'url': 'https://play.afreecatv.com/pyh3646/237852185', + 'url': 'https://play.sooplive.co.kr/pyh3646/237852185', 'info_dict': { 'id': '237852185', 'ext': 'mp4', @@ -315,30 +260,30 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE): }, 'skip': 'Livestream has ended', }, { - 'url': 'https://play.afreecatv.com/pyh3646/237852185', + 'url': 'https://play.sooplive.co.kr/pyh3646/237852185', 'only_matching': True, }, { - 'url': 'https://play.afreecatv.com/pyh3646', + 'url': 'https://play.sooplive.co.kr/pyh3646', 'only_matching': True, }] - _LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php' + _LIVE_API_URL = 'https://live.sooplive.co.kr/afreeca/player_live_api.php' _WORKING_CDNS = [ - 'gcp_cdn', # live-global-cdn-v02.afreecatv.com - 'gs_cdn_pc_app', # pc-app.stream.afreecatv.com - 'gs_cdn_mobile_web', # mobile-web.stream.afreecatv.com - 'gs_cdn_pc_web', # pc-web.stream.afreecatv.com + 'gcp_cdn', # live-global-cdn-v02.sooplive.co.kr + 'gs_cdn_pc_app', # pc-app.stream.sooplive.co.kr + 'gs_cdn_mobile_web', # mobile-web.stream.sooplive.co.kr + 'gs_cdn_pc_web', # pc-web.stream.sooplive.co.kr ] _BAD_CDNS = [ 'gs_cdn', # chromecast.afreeca.gscdn.com (cannot resolve) - 'gs_cdn_chromecast', # chromecast.stream.afreecatv.com (HTTP Error 400) - 'azure_cdn', # live-global-cdn-v01.afreecatv.com (cannot resolve) - 'aws_cf', # live-global-cdn-v03.afreecatv.com (cannot resolve) - 'kt_cdn', # kt.stream.afreecatv.com (HTTP Error 400) + 'gs_cdn_chromecast', # chromecast.stream.sooplive.co.kr (HTTP Error 400) + 'azure_cdn', # live-global-cdn-v01.sooplive.co.kr (cannot resolve) + 'aws_cf', # live-global-cdn-v03.sooplive.co.kr (cannot resolve) + 'kt_cdn', # kt.stream.sooplive.co.kr (HTTP Error 400) ] def _extract_formats(self, channel_info, broadcast_no, aid): - stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com' + stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.sooplive.co.kr' # If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs default_cdn_ids = orderedSet([ @@ -358,7 +303,7 @@ def _extract_formats(self, channel_info, broadcast_no, aid): try: return self._extract_m3u8_formats( m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid}, - headers={'Referer': 'https://play.afreecatv.com/'}) + headers={'Referer': 'https://play.sooplive.co.kr/'}) except ExtractorError as e: if attempt == len(cdn_ids): raise @@ -374,7 +319,13 @@ def _real_extract(self, url): broadcaster_id = channel_info.get('BJID') or broadcaster_id broadcast_no = channel_info.get('BNO') or broadcast_no if not broadcast_no: - raise UserNotLive(video_id=broadcaster_id) + result = channel_info.get('RESULT') + if result == 0: + raise UserNotLive(video_id=broadcaster_id) + elif result == -6: + self.raise_login_required( + 'This channel is streaming for subscribers only', method='password') + raise ExtractorError('Unable to extract broadcast number') password = self.get_param('videopassword') if channel_info.get('BPWD') == 'Y' and password is None: @@ -403,7 +354,7 @@ def _real_extract(self, url): formats = self._extract_formats(channel_info, broadcast_no, aid) station_info = traverse_obj(self._download_json( - 'https://st.afreecatv.com/api/get_station_status.php', broadcast_no, + 'https://st.sooplive.co.kr/api/get_station_status.php', broadcast_no, 'Downloading channel metadata', 'Unable to download channel metadata', query={'szBjId': broadcaster_id}, fatal=False), {dict}) or {} @@ -419,11 +370,11 @@ def _real_extract(self, url): } -class AfreecaTVUserIE(InfoExtractor): - IE_NAME = 'afreecatv:user' - _VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P[^/]+)/vods/?(?P[^/]+)?' +class AfreecaTVUserIE(AfreecaTVBaseIE): + IE_NAME = 'soop:user' + _VALID_URL = r'https?://ch\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P[^/?#]+)/vods/?(?P[^/?#]+)?' _TESTS = [{ - 'url': 'https://bj.afreecatv.com/ryuryu24/vods/review', + 'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/review', 'info_dict': { '_type': 'playlist', 'id': 'ryuryu24', @@ -431,7 +382,7 @@ class AfreecaTVUserIE(InfoExtractor): }, 'playlist_count': 218, }, { - 'url': 'https://bj.afreecatv.com/parang1995/vods/highlight', + 'url': 'https://ch.sooplive.co.kr/parang1995/vods/highlight', 'info_dict': { '_type': 'playlist', 'id': 'parang1995', @@ -439,7 +390,7 @@ class AfreecaTVUserIE(InfoExtractor): }, 'playlist_count': 997, }, { - 'url': 'https://bj.afreecatv.com/ryuryu24/vods', + 'url': 'https://ch.sooplive.co.kr/ryuryu24/vods', 'info_dict': { '_type': 'playlist', 'id': 'ryuryu24', @@ -447,7 +398,7 @@ class AfreecaTVUserIE(InfoExtractor): }, 'playlist_count': 221, }, { - 'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip', + 'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/balloonclip', 'info_dict': { '_type': 'playlist', 'id': 'ryuryu24', @@ -459,12 +410,12 @@ class AfreecaTVUserIE(InfoExtractor): def _fetch_page(self, user_id, user_type, page): page += 1 - info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id, + info = self._download_json(f'https://chapi.sooplive.co.kr/api/{user_id}/vods/{user_type}', user_id, query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'}, note=f'Downloading {user_type} video page {page}') for item in info['data']: yield self.url_result( - f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no']) + f'https://vod.sooplive.co.kr/player/{item["title_no"]}/', AfreecaTVIE, item['title_no']) def _real_extract(self, url): user_id, user_type = self._match_valid_url(url).group('id', 'slug_type') From b8635c1d4779da195e71aa281f73aaad702c935e Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Tue, 22 Oct 2024 16:46:53 +1300 Subject: [PATCH 29/44] [ie/youtube] Support logging in with OAuth (#11001) See: https://github.com/yt-dlp/yt-dlp/wiki/Extractors#logging-in-with-oauth Authored by: coletdjnz --- yt_dlp/extractor/youtube.py | 307 ++++++++++++++++++++++++++++++------ 1 file changed, 262 insertions(+), 45 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 728cb06966..8cbc00f37c 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -22,7 +22,7 @@ from .common import InfoExtractor, SearchInfoExtractor from .openload import PhantomJSwrapper from ..jsinterp import JSInterpreter -from ..networking.exceptions import HTTPError, network_exceptions +from ..networking.exceptions import HTTPError, TransportError, network_exceptions from ..utils import ( NO_DEFAULT, ExtractorError, @@ -55,6 +55,7 @@ str_or_none, str_to_int, strftime_or_none, + time_seconds, traverse_obj, try_call, try_get, @@ -515,6 +516,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): _YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}' + _NETRC_MACHINE = 'youtube' + def ucid_or_none(self, ucid): return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None) @@ -573,9 +576,213 @@ def _real_initialize(self): self._initialize_consent() self._check_login_required() + def _perform_login(self, username, password): + auth_type, _, user = (username or '').partition('+') + + if auth_type != 'oauth': + raise ExtractorError(self._youtube_login_hint, expected=True) + + self._initialize_oauth(user, password) + + ''' + OAuth 2.0 Device Authorization Grant flow, used by the YouTube TV client (youtube.com/tv). + + For more information regarding OAuth 2.0 and the Device Authorization Grant flow in general, see: + - https://developers.google.com/identity/protocols/oauth2/limited-input-device + - https://accounts.google.com/.well-known/openid-configuration + - https://www.rfc-editor.org/rfc/rfc8628 + - https://www.rfc-editor.org/rfc/rfc6749 + + Note: The official client appears to use a proxied version of the oauth2 endpoints on youtube.com/o/oauth2, + which applies some modifications to the response (such as returning errors as 200 OK). + Since the client works with the standard API, we will use that as it is well-documented. + ''' + + _OAUTH_PROFILE = None + _OAUTH_ACCESS_TOKEN_CACHE = {} + _OAUTH_DISPLAY_ID = 'oauth' + + # YouTube TV (TVHTML5) client. You can find these at youtube.com/tv + _OAUTH_CLIENT_ID = '861556708454-d6dlm3lh05idd8npek18k6be8ba3oc68.apps.googleusercontent.com' + _OAUTH_CLIENT_SECRET = 'SboVhoG9s0rNafixCSGGKXAT' + _OAUTH_SCOPE = 'http://gdata.youtube.com https://www.googleapis.com/auth/youtube-paid-content' + + # From https://accounts.google.com/.well-known/openid-configuration + # Technically, these should be fetched dynamically and not hard-coded. + # However, as these endpoints rarely change, we can risk saving an extra request for every invocation. + _OAUTH_DEVICE_AUTHORIZATION_ENDPOINT = 'https://oauth2.googleapis.com/device/code' + _OAUTH_TOKEN_ENDPOINT = 'https://oauth2.googleapis.com/token' + + @property + def _oauth_cache_key(self): + return f'oauth_refresh_token_{self._OAUTH_PROFILE}' + + def _read_oauth_error_response(self, response): + return traverse_obj( + self._webpage_read_content(response, self._OAUTH_TOKEN_ENDPOINT, self._OAUTH_DISPLAY_ID, fatal=False), + ({json.loads}, 'error', {str})) + + def _set_oauth_info(self, token_response): + YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.setdefault(self._OAUTH_PROFILE, {}).update({ + 'access_token': token_response['access_token'], + 'token_type': token_response['token_type'], + 'expiry': time_seconds( + seconds=traverse_obj(token_response, ('expires_in', {float_or_none}), default=300) - 10), + }) + refresh_token = traverse_obj(token_response, ('refresh_token', {str})) + if refresh_token: + self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token) + YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token + + def _initialize_oauth(self, user, refresh_token): + self._OAUTH_PROFILE = user or 'default' + + if self._OAUTH_PROFILE in YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE: + self.write_debug(f'{self._OAUTH_DISPLAY_ID}: Using cached access token for profile "{self._OAUTH_PROFILE}"') + return + + YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE] = {} + + if refresh_token: + refresh_token = refresh_token.strip('\'') or None + + # Allow refresh token passed to initialize cache + if refresh_token: + self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token) + + refresh_token = refresh_token or self.cache.load(self._NETRC_MACHINE, self._oauth_cache_key) + if refresh_token: + YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token + try: + token_response = self._refresh_token(refresh_token) + except ExtractorError as e: + error_msg = str(e.orig_msg).replace('Failed to refresh access token: ', '') + self.report_warning(f'{self._OAUTH_DISPLAY_ID}: Failed to refresh access token: {error_msg}') + token_response = self._oauth_authorize + else: + token_response = self._oauth_authorize + + self._set_oauth_info(token_response) + self.write_debug(f'{self._OAUTH_DISPLAY_ID}: Logged in using profile "{self._OAUTH_PROFILE}"') + + def _refresh_token(self, refresh_token): + try: + token_response = self._download_json( + self._OAUTH_TOKEN_ENDPOINT, + video_id=self._OAUTH_DISPLAY_ID, + note='Refreshing access token', + data=json.dumps({ + 'client_id': self._OAUTH_CLIENT_ID, + 'client_secret': self._OAUTH_CLIENT_SECRET, + 'refresh_token': refresh_token, + 'grant_type': 'refresh_token', + }).encode(), + headers={'Content-Type': 'application/json'}) + except ExtractorError as e: + if isinstance(e.cause, HTTPError): + error = self._read_oauth_error_response(e.cause.response) + if error == 'invalid_grant': + # RFC6749 § 5.2 + raise ExtractorError( + 'Failed to refresh access token: Refresh token is invalid, revoked, or expired (invalid_grant)', + expected=True, video_id=self._OAUTH_DISPLAY_ID) + raise ExtractorError( + f'Failed to refresh access token: Authorization server returned error {error}', + video_id=self._OAUTH_DISPLAY_ID) + raise + return token_response + + @property + def _oauth_authorize(self): + code_response = self._download_json( + self._OAUTH_DEVICE_AUTHORIZATION_ENDPOINT, + video_id=self._OAUTH_DISPLAY_ID, + note='Initializing authorization flow', + data=json.dumps({ + 'client_id': self._OAUTH_CLIENT_ID, + 'scope': self._OAUTH_SCOPE, + }).encode(), + headers={'Content-Type': 'application/json'}) + + verification_url = traverse_obj(code_response, ('verification_url', {str})) + user_code = traverse_obj(code_response, ('user_code', {str})) + if not verification_url or not user_code: + raise ExtractorError( + 'Authorization server did not provide verification_url or user_code', video_id=self._OAUTH_DISPLAY_ID) + + # note: The whitespace is intentional + self.to_screen( + f'{self._OAUTH_DISPLAY_ID}: To give yt-dlp access to your account, ' + f'go to {verification_url} and enter code {user_code}') + + # RFC8628 § 3.5: default poll interval is 5 seconds if not provided + poll_interval = traverse_obj(code_response, ('interval', {int}), default=5) + + for retry in self.RetryManager(): + while True: + try: + token_response = self._download_json( + self._OAUTH_TOKEN_ENDPOINT, + video_id=self._OAUTH_DISPLAY_ID, + note=False, + errnote='Failed to request access token', + data=json.dumps({ + 'client_id': self._OAUTH_CLIENT_ID, + 'client_secret': self._OAUTH_CLIENT_SECRET, + 'device_code': code_response['device_code'], + 'grant_type': 'urn:ietf:params:oauth:grant-type:device_code', + }).encode(), + headers={'Content-Type': 'application/json'}) + except ExtractorError as e: + if isinstance(e.cause, TransportError): + retry.error = e + break + elif isinstance(e.cause, HTTPError): + error = self._read_oauth_error_response(e.cause.response) + if not error: + retry.error = e + break + + if error == 'authorization_pending': + time.sleep(poll_interval) + continue + elif error == 'expired_token': + raise ExtractorError( + 'Authorization timed out', expected=True, video_id=self._OAUTH_DISPLAY_ID) + elif error == 'access_denied': + raise ExtractorError( + 'You denied access to an account', expected=True, video_id=self._OAUTH_DISPLAY_ID) + elif error == 'slow_down': + # RFC8628 § 3.5: add 5 seconds to the poll interval + poll_interval += 5 + time.sleep(poll_interval) + continue + else: + raise ExtractorError( + f'Authorization server returned an error when fetching access token: {error}', + video_id=self._OAUTH_DISPLAY_ID) + raise + + return token_response + + def _update_oauth(self): + token = YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.get(self._OAUTH_PROFILE) + if token is None or token['expiry'] > time.time(): + return + + self._set_oauth_info(self._refresh_token(token['refresh_token'])) + + @property + def _youtube_login_hint(self): + return ('Use --username=oauth[+PROFILE] --password="" to log in using oauth, ' + f'or else u{self._login_hint(method="cookies")[1:]}. ' + 'See https://github.com/yt-dlp/yt-dlp/wiki/Extractors#logging-in-with-oauth for more on how to use oauth. ' + 'See https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies for help with cookies') + def _check_login_required(self): - if self._LOGIN_REQUIRED and not self._cookies_passed: - self.raise_login_required('Login details are needed to download this content', method='cookies') + if self._LOGIN_REQUIRED and not self.is_authenticated: + self.raise_login_required( + f'Login details are needed to download this content. {self._youtube_login_hint}', method=None) _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=' _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=' @@ -674,17 +881,6 @@ def _extract_session_index(*data): if session_index is not None: return session_index - # Deprecated? - def _extract_identity_token(self, ytcfg=None, webpage=None): - if ytcfg: - token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str) - if token: - return token - if webpage: - return self._search_regex( - r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage, - 'identity token', default=None, fatal=False) - def _data_sync_id_to_delegated_session_id(self, data_sync_id): if not data_sync_id: return @@ -731,7 +927,7 @@ def _extract_visitor_data(self, *args): @functools.cached_property def is_authenticated(self): - return bool(self._generate_sapisidhash_header()) + return self._OAUTH_PROFILE or bool(self._generate_sapisidhash_header()) def extract_ytcfg(self, video_id, webpage): if not webpage: @@ -741,21 +937,21 @@ def extract_ytcfg(self, video_id, webpage): r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg', default='{}'), video_id, fatal=False) or {} - def generate_api_headers( - self, *, ytcfg=None, account_syncid=None, session_index=None, - visitor_data=None, identity_token=None, api_hostname=None, default_client='web'): + def _generate_oauth_headers(self): + self._update_oauth() + oauth_token = YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.get(self._OAUTH_PROFILE) + if not oauth_token: + return {} - origin = 'https://' + (self._select_api_hostname(api_hostname, default_client)) - headers = { - 'X-YouTube-Client-Name': str( - self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)), - 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client), - 'Origin': origin, - 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg), - 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg), - 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg), - 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client), + return { + 'Authorization': f'{oauth_token["token_type"]} {oauth_token["access_token"]}', } + + def _generate_cookie_auth_headers(self, *, ytcfg=None, account_syncid=None, session_index=None, origin=None, **kwargs): + headers = {} + account_syncid = account_syncid or self._extract_account_syncid(ytcfg) + if account_syncid: + headers['X-Goog-PageId'] = account_syncid if session_index is None: session_index = self._extract_session_index(ytcfg) if account_syncid or session_index is not None: @@ -765,8 +961,29 @@ def generate_api_headers( if auth is not None: headers['Authorization'] = auth headers['X-Origin'] = origin + + return headers + + def generate_api_headers( + self, *, ytcfg=None, account_syncid=None, session_index=None, + visitor_data=None, api_hostname=None, default_client='web', **kwargs): + + origin = 'https://' + (self._select_api_hostname(api_hostname, default_client)) + headers = { + 'X-YouTube-Client-Name': str( + self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)), + 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client), + 'Origin': origin, + 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg), + 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client), + **self._generate_oauth_headers(), + **self._generate_cookie_auth_headers(ytcfg=ytcfg, account_syncid=account_syncid, session_index=session_index, origin=origin), + } return filter_dict(headers) + def _generate_webpage_headers(self): + return self._generate_oauth_headers() + def _download_ytcfg(self, client, video_id): url = { 'web': 'https://www.youtube.com', @@ -776,7 +993,8 @@ def _download_ytcfg(self, client, video_id): if not url: return {} webpage = self._download_webpage( - url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config') + url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config', + headers=self._generate_webpage_headers()) return self.extract_ytcfg(video_id, webpage) or {} @staticmethod @@ -3041,7 +3259,8 @@ def _load_player(self, video_id, player_url, fatal=True): code = self._download_webpage( player_url, video_id, fatal=fatal, note='Downloading player ' + player_id, - errnote=f'Download of {player_url} failed') + errnote=f'Download of {player_url} failed', + headers=self._generate_webpage_headers()) if code: self._code_cache[player_id] = code return self._code_cache.get(player_id) @@ -3324,7 +3543,8 @@ def _mark_watched(self, video_id, player_responses): self._download_webpage( url, video_id, f'Marking {label}watched', - 'Unable to mark watched', fatal=False) + 'Unable to mark watched', fatal=False, + headers=self._generate_webpage_headers()) @classmethod def _extract_from_webpage(cls, url, webpage): @@ -4305,7 +4525,7 @@ def _download_player_responses(self, url, smuggled_data, video_id, webpage_url): if pp: query['pp'] = pp webpage = self._download_webpage( - webpage_url, video_id, fatal=False, query=query) + webpage_url, video_id, fatal=False, query=query, headers=self._generate_webpage_headers()) master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg() @@ -5593,7 +5813,7 @@ def _extract_webpage(self, url, item_id, fatal=True): webpage, data = None, None for retry in self.RetryManager(fatal=fatal): try: - webpage = self._download_webpage(url, item_id, note='Downloading webpage') + webpage = self._download_webpage(url, item_id, note='Downloading webpage', headers=self._generate_webpage_headers()) data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {} except ExtractorError as e: if isinstance(e.cause, network_exceptions): @@ -6967,7 +7187,7 @@ def _real_extract(self, url, smuggled_data): raise ExtractorError('Unable to recognize tab page') -class YoutubePlaylistIE(InfoExtractor): +class YoutubePlaylistIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube playlists' _VALID_URL = r'''(?x)(?: (?:https?://)? @@ -7081,7 +7301,7 @@ def _real_extract(self, url): return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id) -class YoutubeYtBeIE(InfoExtractor): +class YoutubeYtBeIE(YoutubeBaseInfoExtractor): IE_DESC = 'youtu.be' _VALID_URL = rf'https?://youtu\.be/(?P[0-9A-Za-z_-]{{11}})/*?.*?\blist=(?P{YoutubeBaseInfoExtractor._PLAYLIST_ID_RE})' _TESTS = [{ @@ -7132,7 +7352,7 @@ def _real_extract(self, url): }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id) -class YoutubeLivestreamEmbedIE(InfoExtractor): +class YoutubeLivestreamEmbedIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube livestream embeds' _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P[^&#]+)' _TESTS = [{ @@ -7147,7 +7367,7 @@ def _real_extract(self, url): ie=YoutubeTabIE.ie_key(), video_id=channel_id) -class YoutubeYtUserIE(InfoExtractor): +class YoutubeYtUserIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube user videos; "ytuser:" prefix' IE_NAME = 'youtube:user' _VALID_URL = r'ytuser:(?P.+)' @@ -7434,7 +7654,7 @@ def _real_extract(self, url): return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title) -class YoutubeFeedsInfoExtractor(InfoExtractor): +class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): """ Base class for feed extractors Subclasses must re-define the _FEED_NAME property. @@ -7442,9 +7662,6 @@ class YoutubeFeedsInfoExtractor(InfoExtractor): _LOGIN_REQUIRED = True _FEED_NAME = 'feeds' - def _real_initialize(self): - YoutubeBaseInfoExtractor._check_login_required(self) - @classproperty def IE_NAME(cls): return f'youtube:{cls._FEED_NAME}' @@ -7454,7 +7671,7 @@ def _real_extract(self, url): f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key()) -class YoutubeWatchLaterIE(InfoExtractor): +class YoutubeWatchLaterIE(YoutubeBaseInfoExtractor): IE_NAME = 'youtube:watchlater' IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)' _VALID_URL = r':ytwatchlater' @@ -7508,7 +7725,7 @@ class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): }] -class YoutubeShortsAudioPivotIE(InfoExtractor): +class YoutubeShortsAudioPivotIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)' IE_NAME = 'youtube:shorts:pivot:audio' _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P[\w-]{11})/shorts' @@ -7532,7 +7749,7 @@ def _real_extract(self, url): ie=YoutubeTabIE) -class YoutubeTruncatedURLIE(InfoExtractor): +class YoutubeTruncatedURLIE(YoutubeBaseInfoExtractor): IE_NAME = 'youtube:truncated_url' IE_DESC = False # Do not list _VALID_URL = r'''(?x) @@ -7691,7 +7908,7 @@ def _real_extract(self, url): return self.url_result(redirect_url) -class YoutubeTruncatedIDIE(InfoExtractor): +class YoutubeTruncatedIDIE(YoutubeBaseInfoExtractor): IE_NAME = 'youtube:truncated_id' IE_DESC = False # Do not list _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P[0-9A-Za-z_-]{1,10})$' From e68b4c19af122876561a41f2dd8093fae7b417c7 Mon Sep 17 00:00:00 2001 From: Allen <64094914+allendema@users.noreply.github.com> Date: Tue, 22 Oct 2024 05:54:41 +0200 Subject: [PATCH 30/44] [ie/tubitv] Strip extra whitespace from titles (#10795) Closes #10794 Authored by: allendema --- yt_dlp/extractor/tubitv.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/tubitv.py b/yt_dlp/extractor/tubitv.py index 85eb3a211c..694a92fcd4 100644 --- a/yt_dlp/extractor/tubitv.py +++ b/yt_dlp/extractor/tubitv.py @@ -6,6 +6,7 @@ ExtractorError, int_or_none, js_to_json, + strip_or_none, traverse_obj, url_or_none, urlencode_postdata, @@ -132,12 +133,12 @@ def _real_extract(self, url): return { 'id': video_id, - 'title': title, + 'title': strip_or_none(title), 'formats': formats, 'subtitles': subtitles, 'season_number': int_or_none(season_number), 'episode_number': int_or_none(episode_number), - 'episode': episode_title, + 'episode': strip_or_none(episode_title), **traverse_obj(video_data, { 'description': ('description', {str}), 'duration': ('duration', {int_or_none}), From a886cf3e900f4a2ec00af705f883539269545609 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 22 Oct 2024 04:20:20 +0000 Subject: [PATCH 31/44] [build] Migrate `py2exe` builds to `win_exe` (#11256) This commit removes py2exe support Closes #10087 Authored by: bashonly --- .github/workflows/build.yml | 12 ++------ README.md | 15 +--------- bundle/py2exe.py | 59 ------------------------------------- pyproject.toml | 3 -- yt_dlp/update.py | 11 ++----- 5 files changed, 7 insertions(+), 93 deletions(-) delete mode 100755 bundle/py2exe.py diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 495d3c6306..64227d9740 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -419,22 +419,16 @@ jobs: run: | python -m bundle.pyinstaller python -m bundle.pyinstaller --onedir - Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_real.exe Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip - - name: Install Requirements (py2exe) + - name: Add migration executable for py2exe run: | - python devscripts/install_deps.py --include py2exe - - name: Build (py2exe) - run: | - python -m bundle.py2exe - Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe - Move-Item ./dist/yt-dlp_real.exe ./dist/yt-dlp.exe + Copy-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe - name: Verify --update-to if: vars.UPDATE_TO_VERIFICATION run: | - foreach ($name in @("yt-dlp","yt-dlp_min")) { + foreach ($name in @("yt-dlp")) { Copy-Item "./dist/${name}.exe" "./dist/${name}_downgraded.exe" $version = & "./dist/${name}.exe" --version & "./dist/${name}_downgraded.exe" -v --update-to yt-dlp/yt-dlp@2023.03.04 diff --git a/README.md b/README.md index fc38a529a7..6bd632dd7d 100644 --- a/README.md +++ b/README.md @@ -106,7 +106,6 @@ #### Alternatives File|Description :---|:--- [yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Win7 SP1+) standalone x86 (32-bit) binary -[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`
([Not recommended](#standalone-py2exe-builds-windows)) [yt-dlp_linux](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux)|Linux standalone x64 binary [yt-dlp_linux_armv7l](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_armv7l)|Linux standalone armv7l (32-bit) binary [yt-dlp_linux_aarch64](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64)|Linux standalone aarch64 (64-bit) binary @@ -260,18 +259,6 @@ ### Platform-independent Binary (UNIX) You can also run `make yt-dlp` instead to compile only the binary without updating any of the additional files. (The build tools marked with **\*** are not needed for this) -### Standalone Py2Exe Builds (Windows) - -While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi`/`requests` and need VC++14** on the target computer to run. - -If you wish to build it anyway, install Python (if it is not already installed) and you can run the following commands: - -``` -py devscripts/install_deps.py --include py2exe -py devscripts/make_lazy_extractors.py -py -m bundle.py2exe -``` - ### Related scripts * **`devscripts/install_deps.py`** - Install dependencies for yt-dlp. @@ -1933,7 +1920,7 @@ ## Installing Plugins * Plugin packages can be installed and managed using `pip`. See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example. * Note: plugin files between plugin packages installed with pip must have unique filenames. * Any path in `PYTHONPATH` is searched in for the `yt_dlp_plugins` namespace folder. - * Note: This does not apply for Pyinstaller/py2exe builds. + * Note: This does not apply for Pyinstaller builds. `.zip`, `.egg` and `.whl` archives containing a `yt_dlp_plugins` namespace folder in their root are also supported as plugin packages. diff --git a/bundle/py2exe.py b/bundle/py2exe.py deleted file mode 100755 index 5b7f4883bc..0000000000 --- a/bundle/py2exe.py +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env python3 - -# Allow execution from anywhere -import os -import sys - -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -import warnings - -from py2exe import freeze - -from devscripts.utils import read_version - -VERSION = read_version() - - -def main(): - warnings.warn( - 'py2exe builds do not support pycryptodomex and needs VC++14 to run. ' - 'It is recommended to run "pyinst.py" to build using pyinstaller instead') - - freeze( - console=[{ - 'script': './yt_dlp/__main__.py', - 'dest_base': 'yt-dlp', - 'icon_resources': [(1, 'devscripts/logo.ico')], - }], - version_info={ - 'version': VERSION, - 'description': 'A feature-rich command-line audio/video downloader', - 'comments': 'Official repository: ', - 'product_name': 'yt-dlp', - 'product_version': VERSION, - }, - options={ - 'bundle_files': 0, - 'compressed': 1, - 'optimize': 2, - 'dist_dir': './dist', - 'excludes': [ - # py2exe cannot import Crypto - 'Crypto', - 'Cryptodome', - # requests >=2.32.0 breaks py2exe builds due to certifi dependency - 'requests', - 'urllib3', - ], - 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], - # Modules that are only imported dynamically must be added here - 'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated', - 'yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated'], - }, - zipfile=None, - ) - - -if __name__ == '__main__': - main() diff --git a/pyproject.toml b/pyproject.toml index 200a9c99ae..be81c265cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,9 +85,6 @@ test = [ pyinstaller = [ "pyinstaller>=6.10.0", # Windows temp cleanup fixed in 6.10.0 ] -py2exe = [ - "py2exe>=0.12", -] [project.urls] Documentation = "https://github.com/yt-dlp/yt-dlp#readme" diff --git a/yt_dlp/update.py b/yt_dlp/update.py index 4cf3bdc320..0172acfd63 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -103,7 +103,6 @@ def current_git_head(): _FILE_SUFFIXES = { 'zip': '', - 'py2exe': '_min.exe', 'win_exe': '.exe', 'win_x86_exe': '_x86.exe', 'darwin_exe': '_macos', @@ -117,6 +116,7 @@ def current_git_head(): **{variant: None for variant in _FILE_SUFFIXES}, # Updatable **{variant: f'Auto-update is not supported for unpackaged {name} executable; Re-download the latest release' for variant, name in {'win32_dir': 'Windows', 'darwin_dir': 'MacOS', 'linux_dir': 'Linux'}.items()}, + 'py2exe': 'py2exe is no longer supported by yt-dlp; This executable cannot be updated', 'source': 'You cannot update when running from source code; Use git to pull the latest changes', 'unknown': 'You installed yt-dlp from a manual build or with a package manager; Use that to update', 'other': 'You are using an unofficial build of yt-dlp; Build the executable again', @@ -152,15 +152,10 @@ def _get_system_deprecation(): variant = detect_variant() # Temporary until Windows builds use 3.9, which will drop support for Win7 and 2008ServerR2 - if variant in ('win_exe', 'win_x86_exe', 'py2exe'): + if variant in ('win_exe', 'win_x86_exe'): platform_name = platform.platform() if any(platform_name.startswith(f'Windows-{name}') for name in ('7', '2008ServerR2')): return EXE_MSG_TMPL.format('Windows 7/Server 2008 R2', 'issues/10086', STOP_MSG) - elif variant == 'py2exe': - return EXE_MSG_TMPL.format( - 'py2exe builds (yt-dlp_min.exe)', 'issues/10087', - 'In a future update you will be migrated to the PyInstaller-bundled executable. ' - 'This will be done automatically; no action is required on your part') return None # Temporary until aarch64/armv7l build flow is bumped to Ubuntu 20.04 and Python 3.9 @@ -525,7 +520,7 @@ def update(self, update_info=NO_DEFAULT): return os.rename(old_filename, self.filename) variant = detect_variant() - if variant.startswith('win') or variant == 'py2exe': + if variant.startswith('win'): atexit.register(Popen, f'ping 127.0.0.1 -n 5 -w 1000 & del /F "{old_filename}"', shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) elif old_filename: From 67adeb7bab00662ba55d473e405b301abb42fe61 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Tue, 22 Oct 2024 06:50:35 +0200 Subject: [PATCH 32/44] [cleanup] Misc (#11216) - Add Python 3.13 to CI, finalize 3.13 support - Remove Python 3.8 from CI in preparation for removing 3.8 support - Document that PyPy3.8 and PyPy3.9 are no longer supported - Usual documentation fixes and code cleanup Closes #8248, Closes #11146, Closes #11149, Closes #11211 Authored by: Grub4K, grqz, DTrombett, KarboniteKream, bashonly, mikkovedru, seproDev Co-authored-by: N/Ame <173015200+grqz@users.noreply.github.com> Co-authored-by: DTrombett Co-authored-by: =?UTF-8?q?Klemen=20Ko=C5=A1ir?= Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> Co-authored-by: Mikko Vedru Co-authored-by: sepro --- .github/workflows/core.yml | 12 ++-- .github/workflows/download.yml | 6 +- .github/workflows/quick-test.yml | 6 +- CONTRIBUTING.md | 7 +- README.md | 106 +++++++++++++++-------------- devscripts/changelog_override.json | 10 +++ pyproject.toml | 5 +- setup.cfg | 2 +- yt_dlp/YoutubeDL.py | 20 ++---- yt_dlp/extractor/common.py | 4 +- yt_dlp/extractor/nexx.py | 2 +- yt_dlp/extractor/tubetugraz.py | 2 +- yt_dlp/extractor/ustream.py | 2 +- yt_dlp/extractor/veoh.py | 5 +- yt_dlp/extractor/youtube.py | 4 +- yt_dlp/options.py | 35 +++++----- yt_dlp/utils/traversal.py | 4 +- 17 files changed, 118 insertions(+), 114 deletions(-) diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index a5cb6c9707..9a4342a585 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -36,16 +36,20 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - # CPython 3.8 is in quick-test - python-version: ['3.9', '3.10', '3.11', '3.12', pypy-3.8, pypy-3.10] + # CPython 3.9 is in quick-test + python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10] include: # atleast one of each CPython/PyPy tests must be in windows - os: windows-latest - python-version: '3.8' + python-version: '3.9' + - os: windows-latest + python-version: '3.10' - os: windows-latest python-version: '3.12' - os: windows-latest - python-version: pypy-3.9 + python-version: '3.13' + - os: windows-latest + python-version: pypy-3.10 steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/.github/workflows/download.yml b/.github/workflows/download.yml index 7256804d93..6849fba9b6 100644 --- a/.github/workflows/download.yml +++ b/.github/workflows/download.yml @@ -28,13 +28,13 @@ jobs: fail-fast: true matrix: os: [ubuntu-latest] - python-version: ['3.10', '3.11', '3.12', pypy-3.8, pypy-3.10] + python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10] include: # atleast one of each CPython/PyPy tests must be in windows - os: windows-latest - python-version: '3.8' + python-version: '3.9' - os: windows-latest - python-version: pypy-3.9 + python-version: pypy-3.10 steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml index cce7cbac1e..1a32bbfe31 100644 --- a/.github/workflows/quick-test.yml +++ b/.github/workflows/quick-test.yml @@ -10,10 +10,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: Set up Python 3.8 + - name: Set up Python 3.9 uses: actions/setup-python@v5 with: - python-version: '3.8' + python-version: '3.9' - name: Install test requirements run: python3 ./devscripts/install_deps.py -o --include test - name: Run tests @@ -29,7 +29,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: '3.8' + python-version: '3.9' - name: Install dev dependencies run: python3 ./devscripts/install_deps.py -o --include static-analysis - name: Make lazy extractors diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index dbae6476f6..f1646e5952 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -233,7 +233,7 @@ ## Adding support for a new site # * MD5 checksum; start the string with 'md5:', e.g. # 'description': 'md5:098f6bcd4621d373cade4e832627b4f6', # * A regular expression; start the string with 're:', e.g. - # 'thumbnail': r're:^https?://.*\.jpg$', + # 'thumbnail': r're:https?://.*\.jpg$', # * A count of elements in a list; start the string with 'count:', e.g. # 'tags': 'count:10', # * Any Python type, e.g. @@ -268,7 +268,7 @@ ## Adding support for a new site You can use `hatch fmt` to automatically fix problems. Rules that the linter/formatter enforces should not be disabled with `# noqa` unless a maintainer requests it. The only exception allowed is for old/printf-style string formatting in GraphQL query templates (use `# noqa: UP031`). -1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython and PyPy for Python 3.8 and above. Backward compatibility is not required for even older versions of Python. +1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.8 and PyPy >=3.10. Backward compatibility is not required for even older versions of Python. 1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: ```shell @@ -302,10 +302,9 @@ ### Mandatory and optional metafields For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L119-L440) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp: - `id` (media identifier) - - `title` (media title) - `url` (media download URL) or `formats` -The aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. While all extractors must return a `title`, they must also allow it's extraction to be non-fatal. +The aforementioned metadata fields are the critical data without which extraction does not make any sense. If any of them fail to be extracted, then the extractor is considered broken. All other metadata extraction should be completely non-fatal. For pornographic sites, appropriate `age_limit` must also be returned. diff --git a/README.md b/README.md index 6bd632dd7d..46fff07df2 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![YT-DLP](https://raw.githubusercontent.com/yt-dlp/yt-dlp/master/.github/banner.svg)](#readme) [![Release version](https://img.shields.io/github/v/release/yt-dlp/yt-dlp?color=brightgreen&label=Download&style=for-the-badge)](#installation "Installation") -[![PyPi](https://img.shields.io/badge/-PyPi-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp "PyPi") +[![PyPI](https://img.shields.io/badge/-PyPI-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp "PyPI") [![Donate](https://img.shields.io/badge/_-Donate-red.svg?logo=githubsponsors&labelColor=555555&style=for-the-badge)](Collaborators.md#collaborators "Donate") [![Matrix](https://img.shields.io/matrix/yt-dlp:matrix.org?color=brightgreen&labelColor=555555&label=&logo=element&style=for-the-badge)](https://matrix.to/#/#yt-dlp:matrix.org "Matrix") [![Discord](https://img.shields.io/discord/807245652072857610?color=blue&labelColor=555555&label=&logo=discord&style=for-the-badge)](https://discord.gg/H5MNcFW63r "Discord") @@ -81,7 +81,7 @@ # INSTALLATION [![Windows](https://img.shields.io/badge/-Windows_x64-blue.svg?style=for-the-badge&logo=windows)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe) [![Unix](https://img.shields.io/badge/-Linux/BSD-red.svg?style=for-the-badge&logo=linux)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp) [![MacOS](https://img.shields.io/badge/-MacOS-lightblue.svg?style=for-the-badge&logo=apple)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos) -[![PyPi](https://img.shields.io/badge/-PyPi-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp) +[![PyPI](https://img.shields.io/badge/-PyPI-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp) [![Source Tarball](https://img.shields.io/badge/-Source_tar-green.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) [![Other variants](https://img.shields.io/badge/-Other-grey.svg?style=for-the-badge)](#release-files) [![All versions](https://img.shields.io/badge/-All_Versions-lightgrey.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases) @@ -172,11 +172,11 @@ # To install nightly with pip: ``` ## DEPENDENCIES -Python versions 3.8+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly. +Python versions 3.8+ (CPython) and 3.10+ (PyPy) are supported. Other versions and implementations may or may not work correctly. While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly recommended @@ -438,10 +438,10 @@ ## Video Selection: E.g. "--date today-2weeks" downloads only videos uploaded on the same day two weeks ago --datebefore DATE Download only videos uploaded on or before - this date. The date formats accepted is the + this date. The date formats accepted are the same as --date --dateafter DATE Download only videos uploaded on or after - this date. The date formats accepted is the + this date. The date formats accepted are the same as --date --match-filters FILTER Generic video filter. Any "OUTPUT TEMPLATE" field can be compared with a number or a @@ -726,16 +726,16 @@ ## Verbosity and Simulation Options: used. This option can be used multiple times --print-to-file [WHEN:]TEMPLATE FILE Append given template to the file. The - values of WHEN and TEMPLATE are same as that - of --print. FILE uses the same syntax as the - output template. This option can be used - multiple times + values of WHEN and TEMPLATE are the same as + that of --print. FILE uses the same syntax + as the output template. This option can be + used multiple times -j, --dump-json Quiet, but print JSON information for each video. Simulate unless --no-simulate is used. See "OUTPUT TEMPLATE" for a description of available keys -J, --dump-single-json Quiet, but print JSON information for each - url or infojson passed. Simulate unless + URL or infojson passed. Simulate unless --no-simulate is used. If the URL refers to a playlist, the whole playlist information is dumped in a single line @@ -810,9 +810,9 @@ ## Video Format Options: --no-audio-multistreams Only one audio stream is downloaded for each output file (default) --prefer-free-formats Prefer video formats with free containers - over non-free ones of same quality. Use with - "-S ext" to strictly prefer free containers - irrespective of quality + over non-free ones of the same quality. Use + with "-S ext" to strictly prefer free + containers irrespective of quality --no-prefer-free-formats Don't give any special preference to free containers (default) --check-formats Make sure formats are selected only from @@ -837,15 +837,17 @@ ## Subtitle Options: (default) (Alias: --no-write-automatic-subs) --list-subs List available subtitles of each video. Simulate unless --no-simulate is used - --sub-format FORMAT Subtitle format; accepts formats preference, - e.g. "srt" or "ass/srt/best" + --sub-format FORMAT Subtitle format; accepts formats preference + separated by "/", e.g. "srt" or "ass/srt/best" --sub-langs LANGS Languages of the subtitles to download (can be regex) or "all" separated by commas, e.g. - --sub-langs "en.*,ja". You can prefix the - language code with a "-" to exclude it from - the requested languages, e.g. --sub-langs - all,-live_chat. Use --list-subs for a list - of available language tags + --sub-langs "en.*,ja" (where "en.*" is a + regex pattern that matches "en" followed by + 0 or more of any character). You can prefix + the language code with a "-" to exclude it + from the requested languages, e.g. --sub- + langs all,-live_chat. Use --list-subs for a + list of available language tags ## Authentication Options: -u, --username USERNAME Login with this account ID @@ -893,9 +895,9 @@ ## Post-Processing Options: necessary (currently supported: avi, flv, gif, mkv, mov, mp4, webm, aac, aiff, alac, flac, m4a, mka, mp3, ogg, opus, vorbis, - wav). If target container does not support - the video/audio codec, remuxing will fail. - You can specify multiple rules; e.g. + wav). If the target container does not + support the video/audio codec, remuxing will + fail. You can specify multiple rules; e.g. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 and anything else to mkv --recode-video FORMAT Re-encode the video into another format if @@ -963,29 +965,29 @@ ## Post-Processing Options: are the same as that of --use-postprocessor (default: pre_process) --xattrs Write metadata to the video file's xattrs - (using dublin core and xdg standards) + (using Dublin Core and XDG standards) --concat-playlist POLICY Concatenate videos in a playlist. One of "never", "always", or "multi_video" (default; only when the videos form a single - show). All the video files must have same - codecs and number of streams to be - concatable. The "pl_video:" prefix can be + show). All the video files must have the + same codecs and number of streams to be + concatenable. The "pl_video:" prefix can be used with "--paths" and "--output" to set the output filename for the concatenated files. See "OUTPUT TEMPLATE" for details --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn (the - default; fix file if we can, warn - otherwise), force (try fixing even if file - already exists) + default; fix the file if we can, warn + otherwise), force (try fixing even if the + file already exists) --ffmpeg-location PATH Location of the ffmpeg binary; either the path to the binary or its containing directory --exec [WHEN:]CMD Execute a command, optionally prefixed with when to execute it, separated by a ":". Supported values of "WHEN" are the same as that of --use-postprocessor (default: - after_move). Same syntax as the output + after_move). The same syntax as the output template can be used to pass any field as arguments to the command. If no fields are passed, %(filepath,_filename|)q is appended @@ -1023,7 +1025,7 @@ ## Post-Processing Options: --no-force-keyframes-at-cuts Do not force keyframes around the chapters when cutting/splitting (default) --use-postprocessor NAME[:ARGS] - The (case sensitive) name of plugin + The (case-sensitive) name of plugin postprocessors to be enabled, and (optionally) arguments to be passed to it, separated by a colon ":". ARGS are a @@ -1036,8 +1038,8 @@ ## Post-Processing Options: --print/--output), "before_dl" (before each video download), "post_process" (after each video download; default), "after_move" - (after moving video file to its final - locations), "after_video" (after downloading + (after moving the video file to its final + location), "after_video" (after downloading and processing all formats of a video), or "playlist" (at end of playlist). This option can be used multiple times to add different @@ -1055,7 +1057,7 @@ ## SponsorBlock Options: music_offtopic, poi_highlight, chapter, all and default (=all). You can prefix the category with a "-" to exclude it. See [1] - for description of the categories. E.g. + for descriptions of the categories. E.g. --sponsorblock-mark all,-preview [1] https://wiki.sponsor.ajay.app/w/Segment_Categories --sponsorblock-remove CATS SponsorBlock categories to be removed from @@ -1087,7 +1089,7 @@ ## Extractor Options: (Alias: --no-allow-dynamic-mpd) --hls-split-discontinuity Split HLS playlists to different formats at discontinuities such as ad breaks - --no-hls-split-discontinuity Do not split HLS playlists to different + --no-hls-split-discontinuity Do not split HLS playlists into different formats at discontinuities such as ad breaks (default) --extractor-args IE_KEY:ARGS Pass ARGS arguments to the IE_KEY extractor. @@ -1097,7 +1099,7 @@ ## Extractor Options: # CONFIGURATION -You can configure yt-dlp by placing any supported command line option to a configuration file. The configuration is loaded from the following locations: +You can configure yt-dlp by placing any supported command line option in a configuration file. The configuration is loaded from the following locations: 1. **Main Configuration**: * The file given to `--config-location` @@ -1142,7 +1144,7 @@ # Save all videos under YouTube directory in your home directory -o ~/YouTube/%(title)s.%(ext)s ``` -**Note**: Options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary, as if it were a UNIX shell. +**Note**: Options in a configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary, as if it were a UNIX shell. You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded. @@ -1176,13 +1178,13 @@ ### Authentication with netrc E.g. To use an encrypted `.netrc` file stored as `.authinfo.gpg` ``` -yt-dlp --netrc-cmd 'gpg --decrypt ~/.authinfo.gpg' https://www.youtube.com/watch?v=BaW_jenozKc +yt-dlp --netrc-cmd 'gpg --decrypt ~/.authinfo.gpg' 'https://www.youtube.com/watch?v=BaW_jenozKc' ``` ### Notes about environment variables * Environment variables are normally specified as `${VARIABLE}`/`$VARIABLE` on UNIX and `%VARIABLE%` on Windows; but is always shown as `${VARIABLE}` in this documentation -* yt-dlp also allow using UNIX-style variables on Windows for path-like options; e.g. `--output`, `--config-location` +* yt-dlp also allows using UNIX-style variables on Windows for path-like options; e.g. `--output`, `--config-location` * If unset, `${XDG_CONFIG_HOME}` defaults to `~/.config` and `${XDG_CACHE_HOME}` to `~/.cache` * On Windows, `~` points to `${HOME}` if present; or, `${USERPROFILE}` or `${HOMEDRIVE}${HOMEPATH}` otherwise * On Windows, `${USERPROFILE}` generally points to `C:\Users\` and `${APPDATA}` to `${USERPROFILE}\AppData\Roaming` @@ -1263,7 +1265,7 @@ # OUTPUT TEMPLATE - `like_count` (numeric): Number of positive ratings of the video - `dislike_count` (numeric): Number of negative ratings of the video - `repost_count` (numeric): Number of reposts of the video - - `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage + - `average_rating` (numeric): Average rating given by users, the scale used depends on the webpage - `comment_count` (numeric): Number of comments on the video (For some extractors, comments are only downloaded at the end, and so this field cannot be used) - `age_limit` (numeric): Age restriction for the video (years) - `live_status` (string): One of "not_live", "is_live", "is_upcoming", "was_live", "post_live" (was live, but VOD is not yet processed) @@ -1293,7 +1295,7 @@ # OUTPUT TEMPLATE - `webpage_url` (string): A URL to the video webpage which, if given to yt-dlp, should yield the same result again - `webpage_url_basename` (string): The basename of the webpage URL - `webpage_url_domain` (string): The domain of the webpage URL - - `original_url` (string): The URL given by the user (or same as `webpage_url` for playlist entries) + - `original_url` (string): The URL given by the user (or the same as `webpage_url` for playlist entries) - `categories` (list): List of categories the video belongs to - `tags` (list): List of tags assigned to the video - `cast` (list): List of cast members @@ -1370,7 +1372,7 @@ # OUTPUT TEMPLATE **Tip**: Look at the `-j` output to identify which fields are available for the particular URL -For numeric sequences you can use [numeric related formatting](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting); e.g. `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`. +For numeric sequences, you can use [numeric related formatting](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting); e.g. `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`. Output templates can also contain arbitrary hierarchical path, e.g. `-o "%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s"` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you. @@ -1412,7 +1414,7 @@ # Download entire series season keeping each series and each season in separate # Download video as "C:\MyVideos\uploader\title.ext", subtitles as "C:\MyVideos\subs\uploader\title.ext" # and put all temporary files in "C:\MyVideos\tmp" -$ yt-dlp -P "C:/MyVideos" -P "temp:tmp" -P "subtitle:subs" -o "%(uploader)s/%(title)s.%(ext)s" BaW_jenoz --write-subs +$ yt-dlp -P "C:/MyVideos" -P "temp:tmp" -P "subtitle:subs" -o "%(uploader)s/%(title)s.%(ext)s" BaW_jenozKc --write-subs # Download video as "C:\MyVideos\uploader\title.ext" and subtitles as "C:\MyVideos\uploader\subs\title.ext" $ yt-dlp -P "C:/MyVideos" -o "%(uploader)s/%(title)s.%(ext)s" -o "subtitle:%(uploader)s/subs/%(title)s.%(ext)s" BaW_jenozKc --write-subs @@ -1630,11 +1632,11 @@ # Download the best video (that also has audio) but no bigger than 50 MB, # or the worst video (that also has audio) if there is no video under 50 MB $ yt-dlp -f "b[filesize<50M] / w" -# Download largest video (that also has audio) but no bigger than 50 MB, +# Download the largest video (that also has audio) but no bigger than 50 MB, # or the smallest video (that also has audio) if there is no video under 50 MB $ yt-dlp -f "b" -S "filesize:50M" -# Download best video (that also has audio) that is closest in size to 50 MB +# Download the best video (that also has audio) that is closest in size to 50 MB $ yt-dlp -f "b" -S "filesize~50M" @@ -1690,7 +1692,7 @@ # MODIFYING METADATA The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or an [output template](#output-template) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [Python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups, a single field name, or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields. -Note that these options preserve their relative order, allowing replacements to be made in parsed fields and viceversa. Also, any field thus created can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--embed-metadata`. +Note that these options preserve their relative order, allowing replacements to be made in parsed fields and vice versa. Also, any field thus created can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--embed-metadata`. This option also has a few special uses: @@ -1765,7 +1767,7 @@ # EXTRACTOR ARGUMENTS #### youtube * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively -* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mediaconnect`, `mweb`, `android_producer`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, and `tv_embedded`, `web_creator` and `mediaconnect` are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. Most `android` clients will be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web` +* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mediaconnect`, `mweb`, `android_producer`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, and `tv_embedded`, `web_creator` and `mediaconnect` are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` URLs. Most `android` clients will be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web` * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) @@ -2198,7 +2200,7 @@ ### Differences in default behavior Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc: -* yt-dlp supports only [Python 3.8+](## "Windows 7"), and *may* remove support for more versions as they [become EOL](https://devguide.python.org/versions/#python-release-cycle); while [youtube-dl still supports Python 2.6+ and 3.2+](https://github.com/ytdl-org/youtube-dl/issues/30568#issue-1118238743) +* yt-dlp supports only [Python 3.8+](## "Windows 7"), and will remove support for more versions as they [become EOL](https://devguide.python.org/versions/#python-release-cycle); while [youtube-dl still supports Python 2.6+ and 3.2+](https://github.com/ytdl-org/youtube-dl/issues/30568#issue-1118238743) * The options `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details * `avconv` is not supported as an alternative to `ffmpeg` * yt-dlp stores config files in slightly different locations to youtube-dl. See [CONFIGURATION](#configuration) for a list of correct locations @@ -2274,8 +2276,8 @@ #### Redundant options --min-views COUNT --match-filters "view_count >=? COUNT" --max-views COUNT --match-filters "view_count <=? COUNT" --break-on-reject Use --break-match-filters - --user-agent UA --add-header "User-Agent:UA" - --referer URL --add-header "Referer:URL" + --user-agent UA --add-headers "User-Agent:UA" + --referer URL --add-headers "Referer:URL" --playlist-start NUMBER -I NUMBER: --playlist-end NUMBER -I :NUMBER --playlist-reverse -I ::-1 diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index e7f553a5f2..3d8fe53a52 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -196,5 +196,15 @@ "when": "b31b81d85f00601710d4fac590c3e4efb4133283", "short": "[ci] Rerun failed tests (#11143)", "authors": ["Grub4K"] + }, + { + "action": "add", + "when": "a886cf3e900f4a2ec00af705f883539269545609", + "short": "[priority] **py2exe is no longer supported**\nThis release's `yt-dlp_min.exe` will be the last, and it's actually a PyInstaller-bundled executable so that yt-dlp users updating their py2exe build with `-U` will be automatically migrated. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10087)" + }, + { + "action": "add", + "when": "a886cf3e900f4a2ec00af705f883539269545609", + "short": "[priority] **Following this release, yt-dlp's Python dependencies *must* be installed using the `default` group**\nIf you're installing yt-dlp with pip/pipx or requiring yt-dlp in your own Python project, you'll need to specify `yt-dlp[default]` if you want to also install yt-dlp's optional dependencies (which were previously included by default). [Read more](https://github.com/yt-dlp/yt-dlp/pull/11255)" } ] diff --git a/pyproject.toml b/pyproject.toml index be81c265cb..5439db1df2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Programming Language :: Python :: Implementation", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", @@ -169,13 +170,11 @@ run-cov = "echo Code coverage not implemented && exit 1" [[tool.hatch.envs.hatch-test.matrix]] python = [ - "3.8", "3.9", "3.10", "3.11", "3.12", - "pypy3.8", - "pypy3.9", + "3.13", "pypy3.10", ] diff --git a/setup.cfg b/setup.cfg index 340cc3b4d9..e7f3e2b955 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,7 +16,7 @@ remove-unused-variables = true [tox:tox] skipsdist = true -envlist = py{38,39,310,311,312},pypy{38,39,310} +envlist = py{39,310,311,312,313},pypy310 skip_missing_interpreters = true [testenv] # tox diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index eea1065036..48185b7693 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -154,7 +154,6 @@ try_get, url_basename, variadic, - version_tuple, windows_enable_vt_mode, write_json_file, write_string, @@ -251,7 +250,7 @@ class YoutubeDL: format_sort_force: Force the given format_sort. see "Sorting Formats" for more details. prefer_free_formats: Whether to prefer video formats with free containers - over non-free ones of same quality. + over non-free ones of the same quality. allow_multiple_video_streams: Allow multiple video streams to be merged into a single file allow_multiple_audio_streams: Allow multiple audio streams to be merged @@ -285,7 +284,7 @@ class YoutubeDL: rejecttitle: Reject downloads for matching titles. logger: Log messages to a logging.Logger instance. logtostderr: Print everything to stderr instead of stdout. - consoletitle: Display progress in console window's titlebar. + consoletitle: Display progress in the console window's titlebar. writedescription: Write the video description to a .description file writeinfojson: Write the video description to a .info.json file clean_infojson: Remove internal metadata from the infojson @@ -513,7 +512,7 @@ class YoutubeDL: The following options are used by the extractors: extractor_retries: Number of times to retry for known errors (default: 3) dynamic_mpd: Whether to process dynamic DASH manifests (default: True) - hls_split_discontinuity: Split HLS playlists to different formats at + hls_split_discontinuity: Split HLS playlists into different formats at discontinuities such as ad breaks (default: False) extractor_args: A dictionary of arguments to be passed to the extractors. See "EXTRACTOR ARGUMENTS" for details. @@ -553,7 +552,7 @@ class YoutubeDL: include_ads: - Doesn't work Download ads as well call_home: - Not implemented - Boolean, true iff we are allowed to contact the + Boolean, true if we are allowed to contact the yt-dlp servers for debugging. post_hooks: - Register a custom postprocessor A list of functions that get called as the final step @@ -4089,17 +4088,6 @@ def get_encoding(stream): if plugin_dirs: write_debug(f'Plugin directories: {plugin_dirs}') - # Not implemented - if False and self.params.get('call_home'): - ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode() - write_debug(f'Public IP address: {ipaddr}') - latest_version = self.urlopen( - 'https://yt-dl.org/latest/version').read().decode() - if version_tuple(latest_version) > version_tuple(__version__): - self.report_warning( - f'You are using an outdated version (newest version: {latest_version})! ' - 'See https://yt-dl.org/update if you need help updating.') - @functools.cached_property def proxies(self): """Global proxy configuration""" diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 812fbfa9f9..795105b7d8 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -333,7 +333,7 @@ class InfoExtractor: like_count: Number of positive ratings of the video dislike_count: Number of negative ratings of the video repost_count: Number of reposts of the video - average_rating: Average rating give by users, the scale used depends on the webpage + average_rating: Average rating given by users, the scale used depends on the webpage comment_count: Number of comments on the video comments: A list of comments, each with one or more of the following properties (all but one of text or html optional): @@ -520,7 +520,7 @@ class InfoExtractor: or _extract_from_webpage as necessary. While these are normally classmethods, _extract_from_webpage is allowed to be an instance method. - _extract_from_webpage may raise self.StopExtraction() to stop further + _extract_from_webpage may raise self.StopExtraction to stop further processing of the webpage and obtain exclusive rights to it. This is useful when the extractor cannot reliably be matched using just the URL, e.g. invidious/peertube instances diff --git a/yt_dlp/extractor/nexx.py b/yt_dlp/extractor/nexx.py index cd32892fa0..ee1bc281c6 100644 --- a/yt_dlp/extractor/nexx.py +++ b/yt_dlp/extractor/nexx.py @@ -371,7 +371,7 @@ def find_video(result): # not all videos work via arc, e.g. nexx:741:1269984 if not video: # Reverse engineered from JS code (see getDeviceID function) - device_id = f'{random.randint(1, 4)}:{int(time.time())}:{random.randint(1e4, 99999)}{random.randint(1, 9)}' + device_id = f'{random.randint(1, 4)}:{int(time.time())}:{random.randint(10000, 99999)}{random.randint(1, 9)}' result = self._call_api(domain_id, 'session/init', video_id, data={ 'nxp_devh': device_id, diff --git a/yt_dlp/extractor/tubetugraz.py b/yt_dlp/extractor/tubetugraz.py index d5dbf007b1..805e2686f7 100644 --- a/yt_dlp/extractor/tubetugraz.py +++ b/yt_dlp/extractor/tubetugraz.py @@ -236,7 +236,7 @@ class TubeTuGrazSeriesIE(TubeTuGrazBaseIE): }, }, ], - 'min_playlist_count': 4, + 'playlist_mincount': 4, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/ustream.py b/yt_dlp/extractor/ustream.py index 33cf8f454d..0fdf8f7484 100644 --- a/yt_dlp/extractor/ustream.py +++ b/yt_dlp/extractor/ustream.py @@ -73,7 +73,7 @@ def _get_stream_info(self, url, video_id, app_id_ver, extra_note=None): def num_to_hex(n): return hex(n)[2:] - rnd = random.randrange + rnd = lambda x: random.randrange(int(x)) if not extra_note: extra_note = '' diff --git a/yt_dlp/extractor/veoh.py b/yt_dlp/extractor/veoh.py index dc1bf96ec6..aac768f3c6 100644 --- a/yt_dlp/extractor/veoh.py +++ b/yt_dlp/extractor/veoh.py @@ -8,7 +8,8 @@ int_or_none, parse_duration, qualities, - try_get, + remove_start, + strip_or_none, ) @@ -108,7 +109,7 @@ def _real_extract(self, url): categories = metadata.get('categoryPath') if not categories: - category = try_get(video, lambda x: x['category'].strip().removeprefix('category_')) + category = remove_start(strip_or_none(video.get('category')), 'category_') categories = [category] if category else None tags = video.get('tags') diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 8cbc00f37c..5148e82619 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -7792,9 +7792,9 @@ def _real_extract(self, url): raise ExtractorError( 'Did you forget to quote the URL? Remember that & is a meta ' 'character in most shells, so you want to put the URL in quotes, ' - 'like youtube-dl ' + 'like yt-dlp ' '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" ' - ' or simply youtube-dl BaW_jenozKc .', + ' or simply yt-dlp BaW_jenozKc .', expected=True) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index c3a647da77..c4d2a72743 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -631,13 +631,13 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): metavar='DATE', dest='datebefore', default=None, help=( 'Download only videos uploaded on or before this date. ' - 'The date formats accepted is the same as --date')) + 'The date formats accepted are the same as --date')) selection.add_option( '--dateafter', metavar='DATE', dest='dateafter', default=None, help=( 'Download only videos uploaded on or after this date. ' - 'The date formats accepted is the same as --date')) + 'The date formats accepted are the same as --date')) selection.add_option( '--min-views', metavar='COUNT', dest='min_views', default=None, type=int, @@ -833,7 +833,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): '--prefer-free-formats', action='store_true', dest='prefer_free_formats', default=False, help=( - 'Prefer video formats with free containers over non-free ones of same quality. ' + 'Prefer video formats with free containers over non-free ones of the same quality. ' 'Use with "-S ext" to strictly prefer free containers irrespective of quality')) video_format.add_option( '--no-prefer-free-formats', @@ -907,13 +907,14 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): subtitles.add_option( '--sub-format', action='store', dest='subtitlesformat', metavar='FORMAT', default='best', - help='Subtitle format; accepts formats preference, e.g. "srt" or "ass/srt/best"') + help='Subtitle format; accepts formats preference separated by "/", e.g. "srt" or "ass/srt/best"') subtitles.add_option( '--sub-langs', '--srt-langs', action='callback', dest='subtitleslangs', metavar='LANGS', type='str', default=[], callback=_list_from_options_callback, help=( - 'Languages of the subtitles to download (can be regex) or "all" separated by commas, e.g. --sub-langs "en.*,ja". ' + 'Languages of the subtitles to download (can be regex) or "all" separated by commas, e.g. --sub-langs "en.*,ja" ' + '(where "en.*" is a regex pattern that matches "en" followed by 0 or more of any character). ' 'You can prefix the language code with a "-" to exclude it from the requested languages, e.g. --sub-langs all,-live_chat. ' 'Use --list-subs for a list of available language tags')) @@ -1182,7 +1183,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): '--print-to-file', metavar='[WHEN:]TEMPLATE FILE', dest='print_to_file', nargs=2, **when_prefix('video'), help=( - 'Append given template to the file. The values of WHEN and TEMPLATE are same as that of --print. ' + 'Append given template to the file. The values of WHEN and TEMPLATE are the same as that of --print. ' 'FILE uses the same syntax as the output template. This option can be used multiple times')) verbosity.add_option( '-g', '--get-url', @@ -1226,7 +1227,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): '-J', '--dump-single-json', action='store_true', dest='dump_single_json', default=False, help=( - 'Quiet, but print JSON information for each url or infojson passed. Simulate unless --no-simulate is used. ' + 'Quiet, but print JSON information for each URL or infojson passed. Simulate unless --no-simulate is used. ' 'If the URL refers to a playlist, the whole playlist information is dumped in a single line')) verbosity.add_option( '--print-json', @@ -1570,7 +1571,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): help=( 'Remux the video into another container if necessary ' f'(currently supported: {", ".join(FFmpegVideoRemuxerPP.SUPPORTED_EXTS)}). ' - 'If target container does not support the video/audio codec, remuxing will fail. You can specify multiple rules; ' + 'If the target container does not support the video/audio codec, remuxing will fail. You can specify multiple rules; ' 'e.g. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 and anything else to mkv')) postproc.add_option( '--recode-video', @@ -1676,7 +1677,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): postproc.add_option( '--xattrs', '--xattr', action='store_true', dest='xattrs', default=False, - help='Write metadata to the video file\'s xattrs (using dublin core and xdg standards)') + help='Write metadata to the video file\'s xattrs (using Dublin Core and XDG standards)') postproc.add_option( '--concat-playlist', metavar='POLICY', dest='concat_playlist', default='multi_video', @@ -1684,7 +1685,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): help=( 'Concatenate videos in a playlist. One of "never", "always", or ' '"multi_video" (default; only when the videos form a single show). ' - 'All the video files must have same codecs and number of streams to be concatable. ' + 'All the video files must have the same codecs and number of streams to be concatenable. ' 'The "pl_video:" prefix can be used with "--paths" and "--output" to ' 'set the output filename for the concatenated files. See "OUTPUT TEMPLATE" for details')) postproc.add_option( @@ -1694,8 +1695,8 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): help=( 'Automatically correct known faults of the file. ' 'One of never (do nothing), warn (only emit a warning), ' - 'detect_or_warn (the default; fix file if we can, warn otherwise), ' - 'force (try fixing even if file already exists)')) + 'detect_or_warn (the default; fix the file if we can, warn otherwise), ' + 'force (try fixing even if the file already exists)')) postproc.add_option( '--prefer-avconv', '--no-prefer-ffmpeg', action='store_false', dest='prefer_ffmpeg', @@ -1714,7 +1715,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): help=( 'Execute a command, optionally prefixed with when to execute it, separated by a ":". ' 'Supported values of "WHEN" are the same as that of --use-postprocessor (default: after_move). ' - 'Same syntax as the output template can be used to pass any field as arguments to the command. ' + 'The same syntax as the output template can be used to pass any field as arguments to the command. ' 'If no fields are passed, %(filepath,_filename|)q is appended to the end of the command. ' 'This option can be used multiple times')) postproc.add_option( @@ -1785,14 +1786,14 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): 'delim': None, 'process': lambda val: dict(_postprocessor_opts_parser(*val.split(':', 1))), }, help=( - 'The (case sensitive) name of plugin postprocessors to be enabled, ' + 'The (case-sensitive) name of plugin postprocessors to be enabled, ' 'and (optionally) arguments to be passed to it, separated by a colon ":". ' 'ARGS are a semicolon ";" delimited list of NAME=VALUE. ' 'The "when" argument determines when the postprocessor is invoked. ' 'It can be one of "pre_process" (after video extraction), "after_filter" (after video passes filter), ' '"video" (after --format; before --print/--output), "before_dl" (before each video download), ' '"post_process" (after each video download; default), ' - '"after_move" (after moving video file to its final locations), ' + '"after_move" (after moving the video file to its final location), ' '"after_video" (after downloading and processing all formats of a video), ' 'or "playlist" (at end of playlist). ' 'This option can be used multiple times to add different postprocessors')) @@ -1809,7 +1810,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): }, help=( 'SponsorBlock categories to create chapters for, separated by commas. ' f'Available categories are {", ".join(SponsorBlockPP.CATEGORIES.keys())}, all and default (=all). ' - 'You can prefix the category with a "-" to exclude it. See [1] for description of the categories. ' + 'You can prefix the category with a "-" to exclude it. See [1] for descriptions of the categories. ' 'E.g. --sponsorblock-mark all,-preview [1] https://wiki.sponsor.ajay.app/w/Segment_Categories')) sponsorblock.add_option( '--sponsorblock-remove', metavar='CATS', @@ -1895,7 +1896,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): extractor.add_option( '--no-hls-split-discontinuity', dest='hls_split_discontinuity', action='store_false', - help='Do not split HLS playlists to different formats at discontinuities such as ad breaks (default)') + help='Do not split HLS playlists into different formats at discontinuities such as ad breaks (default)') _extractor_arg_parser = lambda key, vals='': (key.strip().lower().replace('-', '_'), [ val.replace(r'\,', ',').strip() for val in re.split(r'(? Date: Tue, 22 Oct 2024 05:03:55 +0000 Subject: [PATCH 33/44] Release 2024.10.22 Created by: bashonly :ci skip all --- CONTRIBUTORS | 10 ++++++++++ Changelog.md | 50 +++++++++++++++++++++++++++++++++++++++++++++++ supportedsites.md | 49 +++++++++++++++++++++++----------------------- yt_dlp/version.py | 6 +++--- 4 files changed, 88 insertions(+), 27 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index bcdf6a0c24..949bc89c47 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -678,3 +678,13 @@ coreywright eric321 poyhen tetra-fox +444995 +63427083 +allendema +DarkZeros +DTrombett +imranh2 +KarboniteKream +mikkovedru +pktiuk +rubyevadestaxes diff --git a/Changelog.md b/Changelog.md index 10fd437fa1..0efccadd10 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,56 @@ # Changelog # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2024.10.22 + +#### Important changes +- **Following this release, yt-dlp's Python dependencies *must* be installed using the `default` group** +If you're installing yt-dlp with pip/pipx or requiring yt-dlp in your own Python project, you'll need to specify `yt-dlp[default]` if you want to also install yt-dlp's optional dependencies (which were previously included by default). [Read more](https://github.com/yt-dlp/yt-dlp/pull/11255) +- **py2exe is no longer supported** +This release's `yt-dlp_min.exe` will be the last, and it's actually a PyInstaller-bundled executable so that yt-dlp users updating their py2exe build with `-U` will be automatically migrated. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10087) + +#### Core changes +- [Add extractor helpers](https://github.com/yt-dlp/yt-dlp/commit/d710a6ca7c622705c0c8c8a3615916f531137d5d) ([#10653](https://github.com/yt-dlp/yt-dlp/issues/10653)) by [Grub4K](https://github.com/Grub4K) +- [Add option `--plugin-dirs`](https://github.com/yt-dlp/yt-dlp/commit/0f593dca9fa995d88eb763170a932da61c8f24dc) ([#11277](https://github.com/yt-dlp/yt-dlp/issues/11277)) by [coletdjnz](https://github.com/coletdjnz), [imranh2](https://github.com/imranh2) +- **cookies**: [Fix compatibility for Python <=3.9 in traceback](https://github.com/yt-dlp/yt-dlp/commit/c5f0f58efd8c3930de8202c15a5c53b1b635bd51) by [Grub4K](https://github.com/Grub4K) +- **utils** + - `Popen`: [Reset PyInstaller environment](https://github.com/yt-dlp/yt-dlp/commit/fbc66e3ab35743cc847a21223c67d88bb463cd9c) ([#11258](https://github.com/yt-dlp/yt-dlp/issues/11258)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) + - `sanitize_path`: [Reimplement function](https://github.com/yt-dlp/yt-dlp/commit/85b87c991af25dcb35630fa94580fd418e78ee33) ([#11198](https://github.com/yt-dlp/yt-dlp/issues/11198)) by [Grub4K](https://github.com/Grub4K) + +#### Extractor changes +- **adobepass**: [Use newer user-agent for provider redirect request](https://github.com/yt-dlp/yt-dlp/commit/dcfeea4dd5e5686821350baa6c7767a011944867) ([#11250](https://github.com/yt-dlp/yt-dlp/issues/11250)) by [bashonly](https://github.com/bashonly) +- **afreecatv**: [Adapt extractors to new sooplive.co.kr domain](https://github.com/yt-dlp/yt-dlp/commit/46fe60ff19395698a87113b2944453779e04ab9d) ([#11266](https://github.com/yt-dlp/yt-dlp/issues/11266)) by [63427083](https://github.com/63427083), [bashonly](https://github.com/bashonly) +- **cda**: [Support folders](https://github.com/yt-dlp/yt-dlp/commit/c4d95f67ddc522297bb1fea875255cf94b34d595) ([#10786](https://github.com/yt-dlp/yt-dlp/issues/10786)) by [pktiuk](https://github.com/pktiuk) +- **cwtv**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/9d43dcb2c5c38f443f84dfc126cd32720e1a1ad6) ([#11230](https://github.com/yt-dlp/yt-dlp/issues/11230)) by [bashonly](https://github.com/bashonly) +- **drtv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f4338714241b11d9d43768ae71a25f5e952f677d) ([#11141](https://github.com/yt-dlp/yt-dlp/issues/11141)) by [444995](https://github.com/444995) +- **funk**: [Extend `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/8de431ec97a4b62b73df8f686b6e21e462775336) ([#11269](https://github.com/yt-dlp/yt-dlp/issues/11269)) by [seproDev](https://github.com/seproDev) +- **gem.cbc.ca**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/40054cb4a7ebbea30d335d444e6f58b298a3baa0) ([#11196](https://github.com/yt-dlp/yt-dlp/issues/11196)) by [DavidSkrundz](https://github.com/DavidSkrundz) +- **generic**: [Impersonate browser by default](https://github.com/yt-dlp/yt-dlp/commit/edfd095b1917701c5046bd51f9542897c17d41a7) ([#11206](https://github.com/yt-dlp/yt-dlp/issues/11206)) by [Grub4K](https://github.com/Grub4K) +- **imgur** + - [Fix thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/87408ccfd772ddf31a8323d8151c24f9577cbc9f) ([#11298](https://github.com/yt-dlp/yt-dlp/issues/11298)) by [seproDev](https://github.com/seproDev) + - [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/5af774d7a36c00bea618c7047c9326532cd3f616) ([#11075](https://github.com/yt-dlp/yt-dlp/issues/11075)) by [Deer-Spangle](https://github.com/Deer-Spangle) +- **patreon**: campaign: [Stricter URL matching](https://github.com/yt-dlp/yt-dlp/commit/babb70960595e2146f06f81affc29c7e713e34e2) ([#11235](https://github.com/yt-dlp/yt-dlp/issues/11235)) by [bashonly](https://github.com/bashonly) +- **reddit**: [Detect and raise when login is required](https://github.com/yt-dlp/yt-dlp/commit/cba7868502f04175fecf9ab3e363296aee7ebec2) ([#11202](https://github.com/yt-dlp/yt-dlp/issues/11202)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **substack**: [Resolve podcast file extensions](https://github.com/yt-dlp/yt-dlp/commit/3148c1822f66533998278f0a1cf842b9bea1526a) ([#11275](https://github.com/yt-dlp/yt-dlp/issues/11275)) by [bashonly](https://github.com/bashonly) +- **telecinco**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/0b7ec08816fb196cd41d392f8331b4eb8366c4f8) ([#11142](https://github.com/yt-dlp/yt-dlp/issues/11142)) by [bashonly](https://github.com/bashonly), [DarkZeros](https://github.com/DarkZeros) +- **tubitv**: [Strip extra whitespace from titles](https://github.com/yt-dlp/yt-dlp/commit/e68b4c19af122876561a41f2dd8093fae7b417c7) ([#10795](https://github.com/yt-dlp/yt-dlp/issues/10795)) by [allendema](https://github.com/allendema) +- **tver**: [Support series URLs](https://github.com/yt-dlp/yt-dlp/commit/ceaea731b6e314dbbdfb2e358d7677785ed0b4fc) ([#9507](https://github.com/yt-dlp/yt-dlp/issues/9507)) by [pzhlkj6612](https://github.com/pzhlkj6612), [vvto33](https://github.com/vvto33) +- **twitter**: spaces: [Allow extraction when not logged in](https://github.com/yt-dlp/yt-dlp/commit/679c68240a26481ea7c07cc0c014745631ea8481) ([#11289](https://github.com/yt-dlp/yt-dlp/issues/11289)) by [rubyevadestaxes](https://github.com/rubyevadestaxes) +- **weverse**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5310fa87f6cb7f66bf42e2520878952fbf6b1652) ([#11215](https://github.com/yt-dlp/yt-dlp/issues/11215)) by [bashonly](https://github.com/bashonly) +- **youtube** + - [Fix `comment_count` extraction](https://github.com/yt-dlp/yt-dlp/commit/7af1ddaaf2a6a0a750373a9ab53c7770af4f9fe4) ([#11274](https://github.com/yt-dlp/yt-dlp/issues/11274)) by [bashonly](https://github.com/bashonly) + - [Remove broken `android_producer` client](https://github.com/yt-dlp/yt-dlp/commit/fed53d70bdb7d3e37ef63dd7fcf0ef74356167fd) ([#11297](https://github.com/yt-dlp/yt-dlp/issues/11297)) by [bashonly](https://github.com/bashonly) + - [Remove broken age-restriction workaround](https://github.com/yt-dlp/yt-dlp/commit/ec2f4bf0823a13043f98f5bd0bf6677837bf09dc) ([#11297](https://github.com/yt-dlp/yt-dlp/issues/11297)) by [bashonly](https://github.com/bashonly) + - [Support logging in with OAuth](https://github.com/yt-dlp/yt-dlp/commit/b8635c1d4779da195e71aa281f73aaad702c935e) ([#11001](https://github.com/yt-dlp/yt-dlp/issues/11001)) by [coletdjnz](https://github.com/coletdjnz) + +#### Misc. changes +- **build** + - [Migrate `py2exe` builds to `win_exe`](https://github.com/yt-dlp/yt-dlp/commit/a886cf3e900f4a2ec00af705f883539269545609) ([#11256](https://github.com/yt-dlp/yt-dlp/issues/11256)) by [bashonly](https://github.com/bashonly) + - [Use `macos-13` image for macOS builds](https://github.com/yt-dlp/yt-dlp/commit/64d84d75ca8c19ec06558cc7c511f5f4f7a822bc) ([#11236](https://github.com/yt-dlp/yt-dlp/issues/11236)) by [bashonly](https://github.com/bashonly) + - `make_lazy_extractors`: [Force running without plugins](https://github.com/yt-dlp/yt-dlp/commit/1a830394a21a81a3e9918f9e175abc9fbb21f089) ([#11205](https://github.com/yt-dlp/yt-dlp/issues/11205)) by [Grub4K](https://github.com/Grub4K) +- **cleanup**: Miscellaneous: [67adeb7](https://github.com/yt-dlp/yt-dlp/commit/67adeb7bab00662ba55d473e405b301abb42fe61) by [bashonly](https://github.com/bashonly), [DTrombett](https://github.com/DTrombett), [grqz](https://github.com/grqz), [Grub4K](https://github.com/Grub4K), [KarboniteKream](https://github.com/KarboniteKream), [mikkovedru](https://github.com/mikkovedru), [seproDev](https://github.com/seproDev) +- **test**: [Allow running tests explicitly](https://github.com/yt-dlp/yt-dlp/commit/16eb28026a2ddf5608d0a628ef15949b8d3805a9) ([#11203](https://github.com/yt-dlp/yt-dlp/issues/11203)) by [Grub4K](https://github.com/Grub4K) + ### 2024.10.07 #### Core changes diff --git a/supportedsites.md b/supportedsites.md index e23d395fde..7b22e8c6fa 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -45,10 +45,6 @@ # Supported sites - **aenetworks:collection** - **aenetworks:show** - **AeonCo** - - **afreecatv**: [*afreecatv*](## "netrc machine") afreecatv.com - - **afreecatv:catchstory**: [*afreecatv*](## "netrc machine") afreecatv.com catch story - - **afreecatv:live**: [*afreecatv*](## "netrc machine") afreecatv.com livestreams - - **afreecatv:user** - **AirTV** - **AitubeKZVideo** - **AliExpressLive** @@ -254,6 +250,7 @@ # Supported sites - **CCMA** - **CCTV**: 央视网 - **CDA**: [*cdapl*](## "netrc machine") + - **CDAFolder** - **Cellebrite** - **CeskaTelevize** - **CGTN** @@ -1046,8 +1043,8 @@ # Supported sites - **Parler**: Posts on parler.com - **parliamentlive.tv**: UK parliament videos - **Parlview**: (**Currently broken**) - - **Patreon** - - **PatreonCampaign** + - **patreon** + - **patreon:campaign** - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) - **PBSKids** - **PearVideo** @@ -1339,6 +1336,10 @@ # Supported sites - **SohuV** - **SonyLIV**: [*sonyliv*](## "netrc machine") - **SonyLIVSeries** + - **soop**: [*afreecatv*](## "netrc machine") sooplive.co.kr + - **soop:catchstory**: [*afreecatv*](## "netrc machine") sooplive.co.kr catch story + - **soop:live**: [*afreecatv*](## "netrc machine") sooplive.co.kr livestreams + - **soop:user**: [*afreecatv*](## "netrc machine") - **soundcloud**: [*soundcloud*](## "netrc machine") - **soundcloud:playlist**: [*soundcloud*](## "netrc machine") - **soundcloud:related**: [*soundcloud*](## "netrc machine") @@ -1778,24 +1779,24 @@ # Supported sites - **YouPornStar**: YouPorn Pornstar, with description, sorting and pagination - **YouPornTag**: YouPorn tag (porntags), with sorting, filtering and pagination - **YouPornVideos**: YouPorn video (browse) playlists, with sorting, filtering and pagination - - **youtube**: YouTube - - **youtube:clip** - - **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies) - - **youtube:history**: Youtube watch history; ":ythis" keyword (requires cookies) - - **youtube:​music:search_url**: YouTube music search URLs with selectable sections, e.g. #songs - - **youtube:notif**: YouTube notifications; ":ytnotif" keyword (requires cookies) - - **youtube:playlist**: YouTube playlists - - **youtube:recommended**: YouTube recommended videos; ":ytrec" keyword - - **youtube:search**: YouTube search; "ytsearch:" prefix - - **youtube:​search:date**: YouTube search, newest videos first; "ytsearchdate:" prefix - - **youtube:search_url**: YouTube search URLs with sorting and filter support - - **youtube:​shorts:pivot:audio**: YouTube Shorts audio pivot (Shorts using audio of a given video) - - **youtube:subscriptions**: YouTube subscriptions feed; ":ytsubs" keyword (requires cookies) - - **youtube:tab**: YouTube Tabs - - **youtube:user**: YouTube user videos; "ytuser:" prefix - - **youtube:watchlater**: Youtube watch later list; ":ytwatchlater" keyword (requires cookies) - - **YoutubeLivestreamEmbed**: YouTube livestream embeds - - **YoutubeYtBe**: youtu.be + - **youtube**: [*youtube*](## "netrc machine") YouTube + - **youtube:clip**: [*youtube*](## "netrc machine") + - **youtube:favorites**: [*youtube*](## "netrc machine") YouTube liked videos; ":ytfav" keyword (requires cookies) + - **youtube:history**: [*youtube*](## "netrc machine") Youtube watch history; ":ythis" keyword (requires cookies) + - **youtube:​music:search_url**: [*youtube*](## "netrc machine") YouTube music search URLs with selectable sections, e.g. #songs + - **youtube:notif**: [*youtube*](## "netrc machine") YouTube notifications; ":ytnotif" keyword (requires cookies) + - **youtube:playlist**: [*youtube*](## "netrc machine") YouTube playlists + - **youtube:recommended**: [*youtube*](## "netrc machine") YouTube recommended videos; ":ytrec" keyword + - **youtube:search**: [*youtube*](## "netrc machine") YouTube search; "ytsearch:" prefix + - **youtube:​search:date**: [*youtube*](## "netrc machine") YouTube search, newest videos first; "ytsearchdate:" prefix + - **youtube:search_url**: [*youtube*](## "netrc machine") YouTube search URLs with sorting and filter support + - **youtube:​shorts:pivot:audio**: [*youtube*](## "netrc machine") YouTube Shorts audio pivot (Shorts using audio of a given video) + - **youtube:subscriptions**: [*youtube*](## "netrc machine") YouTube subscriptions feed; ":ytsubs" keyword (requires cookies) + - **youtube:tab**: [*youtube*](## "netrc machine") YouTube Tabs + - **youtube:user**: [*youtube*](## "netrc machine") YouTube user videos; "ytuser:" prefix + - **youtube:watchlater**: [*youtube*](## "netrc machine") Youtube watch later list; ":ytwatchlater" keyword (requires cookies) + - **YoutubeLivestreamEmbed**: [*youtube*](## "netrc machine") YouTube livestream embeds + - **YoutubeYtBe**: [*youtube*](## "netrc machine") youtu.be - **Zaiko** - **ZaikoETicket** - **Zapiks** diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 2ad18dd196..17d7881845 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2024.10.07' +__version__ = '2024.10.22' -RELEASE_GIT_HEAD = '1a176d874e6772cd898ce507379ea388e96ee3f7' +RELEASE_GIT_HEAD = '67adeb7bab00662ba55d473e405b301abb42fe61' VARIANT = None @@ -12,4 +12,4 @@ ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2024.10.07' +_pkg_version = '2024.10.22' From 87884f15580910e4e0fe0e1db73508debc657471 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 22 Oct 2024 05:40:25 +0000 Subject: [PATCH 34/44] [build] Move optional dependencies to the `default` group (#11255) Closes #11221 Authored by: bashonly --- pyproject.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5439db1df2..ff5e38ff50 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,10 @@ classifiers = [ "Operating System :: OS Independent", ] dynamic = ["version"] -dependencies = [ +dependencies = [] + +[project.optional-dependencies] +default = [ "brotli; implementation_name=='cpython'", "brotlicffi; implementation_name!='cpython'", "certifi", @@ -52,9 +55,6 @@ dependencies = [ "urllib3>=1.26.17,<3", "websockets>=13.0", ] - -[project.optional-dependencies] -default = [] curl-cffi = [ "curl-cffi==0.5.10; os_name=='nt' and implementation_name=='cpython'", "curl-cffi>=0.5.10,!=0.6.*,<0.7.2; os_name!='nt' and implementation_name=='cpython'", From ea9e35d85fba5eab341cdcaf1eaed69b57f7e465 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 22 Oct 2024 06:03:30 +0000 Subject: [PATCH 35/44] [cleanup] Misc (#11311) Authored by: bashonly --- .github/workflows/build.yml | 11 +++++------ devscripts/changelog_override.json | 5 +++++ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 64227d9740..00326416d8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -72,7 +72,7 @@ on: default: true type: boolean windows: - description: yt-dlp.exe, yt-dlp_min.exe, yt-dlp_win.zip + description: yt-dlp.exe, yt-dlp_win.zip default: true type: boolean windows32: @@ -421,10 +421,6 @@ jobs: python -m bundle.pyinstaller --onedir Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip - - name: Add migration executable for py2exe - run: | - Copy-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe - - name: Verify --update-to if: vars.UPDATE_TO_VERIFICATION run: | @@ -444,7 +440,6 @@ jobs: name: build-bin-${{ github.job }} path: | dist/yt-dlp.exe - dist/yt-dlp_min.exe dist/yt-dlp_win.zip compression-level: 0 @@ -531,13 +526,17 @@ jobs: lock 2022.08.18.36 .+ Python 3\.6 lock 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) + lock 2024.10.22 py2exe .+ lockV2 yt-dlp/yt-dlp 2022.08.18.36 .+ Python 3\.6 lockV2 yt-dlp/yt-dlp 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) + lockV2 yt-dlp/yt-dlp 2024.10.22 py2exe .+ lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 win_x86_exe .+ Windows-(?:Vista|2008Server) + lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 py2exe .+ lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 win_x86_exe .+ Windows-(?:Vista|2008Server) + lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.045052 py2exe .+ EOF - name: Sign checksum files diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index 3d8fe53a52..3262a0e678 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -206,5 +206,10 @@ "action": "add", "when": "a886cf3e900f4a2ec00af705f883539269545609", "short": "[priority] **Following this release, yt-dlp's Python dependencies *must* be installed using the `default` group**\nIf you're installing yt-dlp with pip/pipx or requiring yt-dlp in your own Python project, you'll need to specify `yt-dlp[default]` if you want to also install yt-dlp's optional dependencies (which were previously included by default). [Read more](https://github.com/yt-dlp/yt-dlp/pull/11255)" + }, + { + "action": "add", + "when": "87884f15580910e4e0fe0e1db73508debc657471", + "short": "[priority] **Beginning with this release, yt-dlp's Python dependencies *must* be installed using the `default` group**\nIf you're installing yt-dlp with pip/pipx or requiring yt-dlp in your own Python project, you'll need to specify `yt-dlp[default]` if you want to also install yt-dlp's optional dependencies (which were previously included by default). [Read more](https://github.com/yt-dlp/yt-dlp/pull/11255)" } ] From dd2e24446954246a2ec4d4a7e95531f52a14b351 Mon Sep 17 00:00:00 2001 From: bashonly Date: Tue, 22 Oct 2024 13:09:43 -0500 Subject: [PATCH 36/44] [build] Use Ubuntu 20.04 and Python 3.9 for Linux ARM builds (#8638) Authored by: bashonly --- .github/workflows/build.yml | 26 ++++++++++++++++---------- yt_dlp/update.py | 10 ++-------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 00326416d8..fdca5d702e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -199,22 +199,24 @@ jobs: GITHUB_WORKFLOW: build githubToken: ${{ github.token }} # To cache image arch: ${{ matrix.architecture }} - distro: ubuntu18.04 # Standalone executable should be built on minimum supported OS + distro: ubuntu20.04 # Standalone executable should be built on minimum supported OS dockerRunArgs: --volume "${PWD}/repo:/repo" install: | # Installing Python 3.10 from the Deadsnakes repo raises errors apt update - apt -y install zlib1g-dev libffi-dev python3.8 python3.8-dev python3.8-distutils python3-pip - python3.8 -m pip install -U pip setuptools wheel - # Cannot access any files from the repo directory at this stage - python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi secretstorage cffi + apt -y install zlib1g-dev libffi-dev python3.9 python3.9-dev python3.9-distutils python3-pip \ + python3-secretstorage # Cannot build cryptography wheel in virtual armv7 environment + python3.9 -m pip install -U pip wheel 'setuptools>=71.0.2' + # XXX: Keep this in sync with pyproject.toml (it can't be accessed at this stage) and exclude secretstorage + python3.9 -m pip install -U Pyinstaller mutagen pycryptodomex brotli certifi cffi \ + 'requests>=2.32.2,<3' 'urllib3>=1.26.17,<3' 'websockets>=13.0' run: | cd repo - python3.8 devscripts/install_deps.py -o --include build - python3.8 devscripts/install_deps.py --include pyinstaller --include secretstorage # Cached version may be out of date - python3.8 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" - python3.8 devscripts/make_lazy_extractors.py - python3.8 -m bundle.pyinstaller + python3.9 devscripts/install_deps.py -o --include build + python3.9 devscripts/install_deps.py --include pyinstaller # Cached versions may be out of date + python3.9 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" + python3.9 devscripts/make_lazy_extractors.py + python3.9 -m bundle.pyinstaller if ${{ vars.UPDATE_TO_VERIFICATION && 'true' || 'false' }}; then arch="${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}" @@ -527,16 +529,20 @@ jobs: lock 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) lock 2024.10.22 py2exe .+ + lock 2024.10.22 linux_(?:armv7l|aarch64)_exe .+ glibc 2\.(?:[12]?\d|30)\b lockV2 yt-dlp/yt-dlp 2022.08.18.36 .+ Python 3\.6 lockV2 yt-dlp/yt-dlp 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp 2024.10.22 py2exe .+ + lockV2 yt-dlp/yt-dlp 2024.10.22 linux_(?:armv7l|aarch64)_exe .+ glibc 2\.(?:[12]?\d|30)\b lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 py2exe .+ + lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 linux_(?:armv7l|aarch64)_exe .+ glibc 2\.(?:[12]?\d|30)\b lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.045052 py2exe .+ + lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 linux_(?:armv7l|aarch64)_exe .+ glibc 2\.(?:[12]?\d|30)\b EOF - name: Sign checksum files diff --git a/yt_dlp/update.py b/yt_dlp/update.py index 0172acfd63..3a8d78de41 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -158,13 +158,6 @@ def _get_system_deprecation(): return EXE_MSG_TMPL.format('Windows 7/Server 2008 R2', 'issues/10086', STOP_MSG) return None - # Temporary until aarch64/armv7l build flow is bumped to Ubuntu 20.04 and Python 3.9 - elif variant in ('linux_aarch64_exe', 'linux_armv7l_exe'): - libc_ver = version_tuple(os.confstr('CS_GNU_LIBC_VERSION').partition(' ')[2]) - if libc_ver < (2, 31): - return EXE_MSG_TMPL.format('system glibc version < 2.31', 'pull/8638', STOP_MSG) - return None - return f'Support for Python version {major}.{minor} has been deprecated. {PYTHON_MSG}' @@ -357,7 +350,8 @@ def _process_update_spec(self, lockfile: str, resolved_tag: str): continue self._report_error( - f'yt-dlp cannot be updated to {resolved_tag} since you are on an older Python version', True) + f'yt-dlp cannot be updated to {resolved_tag} since you are on an older Python version ' + 'or your operating system is not compatible with the requested build', True) return None return resolved_tag From d784464399b600ba9516bbcec6286f11d68974dd Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 23 Oct 2024 06:33:50 +0000 Subject: [PATCH 37/44] Remove Python 3.8 support (#11321) Closes #10086 Authored by: bashonly --- .github/workflows/build.yml | 22 +++++--- CONTRIBUTING.md | 20 ++++--- README.md | 10 ++-- devscripts/make_issue_template.py | 15 +++--- pyproject.toml | 3 +- setup.cfg | 2 +- test/test_update.py | 89 ++++++++++++++++++++++++------- yt_dlp/__init__.py | 4 +- yt_dlp/compat/compat_utils.py | 2 +- yt_dlp/compat/functools.py | 5 -- yt_dlp/extractor/pornbox.py | 3 +- yt_dlp/plugins.py | 2 +- yt_dlp/update.py | 16 +----- yt_dlp/utils/_utils.py | 2 +- 14 files changed, 120 insertions(+), 75 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fdca5d702e..d062d7720d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -405,8 +405,8 @@ jobs: steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 - with: # 3.8 is used for Win7 support - python-version: "3.8" + with: + python-version: "3.10" - name: Install Requirements run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds python devscripts/install_deps.py -o --include build @@ -454,7 +454,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: "3.8" + python-version: "3.10" architecture: "x86" - name: Install Requirements run: | @@ -529,20 +529,28 @@ jobs: lock 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) lock 2024.10.22 py2exe .+ - lock 2024.10.22 linux_(?:armv7l|aarch64)_exe .+ glibc 2\.(?:[12]?\d|30)\b + lock 2024.10.22 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b + lock 2024.10.22 (?!\w+_exe).+ Python 3\.8 + lock 2024.10.22 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) lockV2 yt-dlp/yt-dlp 2022.08.18.36 .+ Python 3\.6 lockV2 yt-dlp/yt-dlp 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp 2024.10.22 py2exe .+ - lockV2 yt-dlp/yt-dlp 2024.10.22 linux_(?:armv7l|aarch64)_exe .+ glibc 2\.(?:[12]?\d|30)\b + lockV2 yt-dlp/yt-dlp 2024.10.22 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b + lockV2 yt-dlp/yt-dlp 2024.10.22 (?!\w+_exe).+ Python 3\.8 + lockV2 yt-dlp/yt-dlp 2024.10.22 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 py2exe .+ - lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 linux_(?:armv7l|aarch64)_exe .+ glibc 2\.(?:[12]?\d|30)\b + lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b + lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 (?!\w+_exe).+ Python 3\.8 + lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.045052 py2exe .+ - lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 linux_(?:armv7l|aarch64)_exe .+ glibc 2\.(?:[12]?\d|30)\b + lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b + lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 (?!\w+_exe).+ Python 3\.8 + lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) EOF - name: Sign checksum files diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f1646e5952..fd7b0f1210 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -37,14 +37,18 @@ # OPENING AN ISSUE **Please include the full output of yt-dlp when run with `-vU`**, i.e. **add** `-vU` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this: ``` $ yt-dlp -vU -[debug] Command-line config: ['-v', 'demo.com'] -[debug] Encodings: locale UTF-8, fs utf-8, out utf-8, pref UTF-8 -[debug] yt-dlp version 2021.09.25 (zip) -[debug] Python version 3.8.10 (CPython 64bit) - Linux-5.4.0-74-generic-x86_64-with-glibc2.29 -[debug] exe versions: ffmpeg 4.2.4, ffprobe 4.2.4 +[debug] Command-line config: ['-vU', 'https://www.example.com/'] +[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 +[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe) +[debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023) +[debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2 +[debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1 [debug] Proxy map: {} -Current Build Hash 25cc412d1d3c0725a1f2f5b7e4682f6fb40e6d15f7024e96f7afd572e9919535 -yt-dlp is up to date (2021.09.25) +[debug] Request Handlers: urllib, requests, websockets, curl_cffi +[debug] Loaded 1838 extractors +[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest +Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds +yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) ... ``` **Do not post screenshots of verbose logs; only plain text is acceptable.** @@ -268,7 +272,7 @@ ## Adding support for a new site You can use `hatch fmt` to automatically fix problems. Rules that the linter/formatter enforces should not be disabled with `# noqa` unless a maintainer requests it. The only exception allowed is for old/printf-style string formatting in GraphQL query templates (use `# noqa: UP031`). -1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.8 and PyPy >=3.10. Backward compatibility is not required for even older versions of Python. +1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.9 and PyPy >=3.10. Backward compatibility is not required for even older versions of Python. 1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: ```shell diff --git a/README.md b/README.md index 46fff07df2..05b8e2b868 100644 --- a/README.md +++ b/README.md @@ -98,14 +98,14 @@ #### Recommended File|Description :---|:--- [yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform-independent [zipimport](https://docs.python.org/3/library/zipimport.html) binary. Needs Python (recommended for **Linux/BSD**) -[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows (Win7 SP1+) standalone x64 binary (recommended for **Windows**) +[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows (Win8+) standalone x64 binary (recommended for **Windows**) [yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|Universal MacOS (10.15+) standalone executable (recommended for **MacOS**) #### Alternatives File|Description :---|:--- -[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Win7 SP1+) standalone x86 (32-bit) binary +[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Win8+) standalone x86 (32-bit) binary [yt-dlp_linux](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux)|Linux standalone x64 binary [yt-dlp_linux_armv7l](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_armv7l)|Linux standalone armv7l (32-bit) binary [yt-dlp_linux_aarch64](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64)|Linux standalone aarch64 (64-bit) binary @@ -172,7 +172,7 @@ # To install nightly with pip: ``` ## DEPENDENCIES -Python versions 3.8+ (CPython) and 3.10+ (PyPy) are supported. Other versions and implementations may or may not work correctly. +Python versions 3.9+ (CPython) and 3.10+ (PyPy) are supported. Other versions and implementations may or may not work correctly.