From b31b81d85f00601710d4fac590c3e4efb4133283 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Tue, 1 Oct 2024 00:33:17 +0200 Subject: [PATCH 1/3] [ci] Rerun failed tests (#11143) --- .github/workflows/core.yml | 2 +- .github/workflows/quick-test.yml | 2 +- pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index 21a64efa96..a5cb6c9707 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -59,4 +59,4 @@ jobs: continue-on-error: False run: | python3 -m yt_dlp -v || true # Print debug head - python3 ./devscripts/run_tests.py core + python3 ./devscripts/run_tests.py --pytest-args '--reruns 2 --reruns-delay 3.0' core diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml index 1571d3cab4..cce7cbac1e 100644 --- a/.github/workflows/quick-test.yml +++ b/.github/workflows/quick-test.yml @@ -20,7 +20,7 @@ jobs: timeout-minutes: 15 run: | python3 -m yt_dlp -v || true - python3 ./devscripts/run_tests.py core + python3 ./devscripts/run_tests.py --pytest-args '--reruns 2 --reruns-delay 3.0' core check: name: Code check if: "!contains(github.event.head_commit.message, 'ci skip all')" diff --git a/pyproject.toml b/pyproject.toml index f54980d576..200a9c99ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,6 +80,7 @@ static-analysis = [ ] test = [ "pytest~=8.1", + "pytest-rerunfailures~=14.0", ] pyinstaller = [ "pyinstaller>=6.10.0", # Windows temp cleanup fixed in 6.10.0 @@ -162,7 +163,6 @@ lint-fix = "ruff check --fix {args:.}" features = ["test"] dependencies = [ "pytest-randomly~=3.15", - "pytest-rerunfailures~=14.0", "pytest-xdist[psutil]~=3.5", ] From f91645aceaf13926cf35be2c1dfef61b3aab97fb Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 30 Sep 2024 17:42:30 -0500 Subject: [PATCH 2/3] [ie/patreon] Extract all m3u8 formats for locked posts (#11138) Closes #11125 Authored by: bashonly --- yt_dlp/extractor/patreon.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index 4489d533a6..f5cb2a5d65 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -1,3 +1,4 @@ +import functools import itertools import urllib.parse @@ -22,13 +23,19 @@ class PatreonBaseIE(InfoExtractor): - USER_AGENT = 'Patreon/7.6.28 (Android; Android 11; Scale/2.10)' + @functools.cached_property + def patreon_user_agent(self): + # Patreon mobile UA is needed to avoid triggering Cloudflare anti-bot protection. + # Newer UA yields higher res m3u8 formats for locked posts, but gives 401 if not logged-in + if self._get_cookies('https://www.patreon.com/').get('session_id'): + return 'Patreon/72.2.28 (Android; Android 14; Scale/2.10)' + return 'Patreon/7.6.28 (Android; Android 11; Scale/2.10)' def _call_api(self, ep, item_id, query=None, headers=None, fatal=True, note=None): if headers is None: headers = {} if 'User-Agent' not in headers: - headers['User-Agent'] = self.USER_AGENT + headers['User-Agent'] = self.patreon_user_agent if query: query.update({'json-api-version': 1.0}) @@ -111,6 +118,7 @@ class PatreonIE(PatreonBaseIE): 'comment_count': int, 'channel_is_verified': True, 'chapters': 'count:4', + 'timestamp': 1423689666, }, 'params': { 'noplaylist': True, @@ -221,6 +229,7 @@ class PatreonIE(PatreonBaseIE): 'thumbnail': r're:^https?://.+', }, 'params': {'skip_download': 'm3u8'}, + 'expected_warnings': ['Failed to parse XML: not well-formed'], }, { # multiple attachments/embeds 'url': 'https://www.patreon.com/posts/holy-wars-solos-100601977', @@ -326,8 +335,13 @@ def _real_extract(self, url): if embed_url and (urlh := self._request_webpage( embed_url, video_id, 'Checking embed URL', headers=headers, fatal=False, errnote=False, expected_status=403)): + # Vimeo's Cloudflare anti-bot protection will return HTTP status 200 for 404, so we need + # to check for "Sorry, we couldn&rsquo;t find that page" in the meta description tag + meta_description = clean_html(self._html_search_meta( + 'description', self._webpage_read_content(urlh, embed_url, video_id, fatal=False), default=None)) # Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie - if urlh.status != 403 or VidsIoIE.suitable(embed_url): + if ((urlh.status != 403 and meta_description != 'Sorry, we couldn’t find that page') + or VidsIoIE.suitable(embed_url)): entries.append(self.url_result(smuggle_url(embed_url, headers))) post_file = traverse_obj(attributes, ('post_file', {dict})) @@ -427,7 +441,7 @@ class PatreonCampaignIE(PatreonBaseIE): 'title': 'Cognitive Dissonance Podcast', 'channel_url': 'https://www.patreon.com/dissonancepod', 'id': '80642', - 'description': 'md5:eb2fa8b83da7ab887adeac34da6b7af7', + 'description': r're:(?s).*We produce a weekly news podcast focusing on stories that deal with skepticism and religion.*', 'channel_id': '80642', 'channel': 'Cognitive Dissonance Podcast', 'age_limit': 0, @@ -445,7 +459,7 @@ class PatreonCampaignIE(PatreonBaseIE): 'id': '4767637', 'channel_id': '4767637', 'channel_url': 'https://www.patreon.com/notjustbikes', - 'description': 'md5:9f4b70051216c4d5c58afe580ffc8d0f', + 'description': r're:(?s).*Not Just Bikes started as a way to explain why we chose to live in the Netherlands.*', 'age_limit': 0, 'channel': 'Not Just Bikes', 'uploader_url': 'https://www.patreon.com/notjustbikes', @@ -462,7 +476,7 @@ class PatreonCampaignIE(PatreonBaseIE): 'id': '4243769', 'channel_id': '4243769', 'channel_url': 'https://www.patreon.com/secondthought', - 'description': 'md5:69c89a3aba43efdb76e85eb023e8de8b', + 'description': r're:(?s).*Second Thought is an educational YouTube channel.*', 'age_limit': 0, 'channel': 'Second Thought', 'uploader_url': 'https://www.patreon.com/secondthought', @@ -512,7 +526,7 @@ def _real_extract(self, url): campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity') if campaign_id is None: - webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.USER_AGENT}) + webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.patreon_user_agent}) campaign_id = self._search_nextjs_data( webpage, vanity)['props']['pageProps']['bootstrapEnvelope']['pageBootstrap']['campaign']['data']['id'] From e59c82a74cda5139eb3928c75b0bd45484dbe7f0 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Tue, 1 Oct 2024 02:13:48 +0200 Subject: [PATCH 3/3] [cookies] Fix cookie load error handling (#11140) Authored by: Grub4K --- yt_dlp/YoutubeDL.py | 15 +++++++++++---- yt_dlp/__init__.py | 4 ++-- yt_dlp/cookies.py | 36 ++++++++++++++++++++++-------------- 3 files changed, 35 insertions(+), 20 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 9691a1ea7c..4f45d7faf6 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -27,7 +27,7 @@ from .cache import Cache from .compat import urllib # isort: split from .compat import compat_os_name, urllib_req_to_req -from .cookies import LenientSimpleCookie, load_cookies +from .cookies import CookieLoadError, LenientSimpleCookie, load_cookies from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name from .downloader.rtmp import rtmpdump_version from .extractor import gen_extractor_classes, get_info_extractor @@ -1624,7 +1624,7 @@ def wrapper(self, *args, **kwargs): while True: try: return func(self, *args, **kwargs) - except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError): + except (CookieLoadError, DownloadCancelled, LazyList.IndexError, PagedList.IndexError): raise except ReExtractInfo as e: if e.expected: @@ -3580,6 +3580,8 @@ def __download_wrapper(self, func): def wrapper(*args, **kwargs): try: res = func(*args, **kwargs) + except CookieLoadError: + raise except UnavailableVideoError as e: self.report_error(e) except DownloadCancelled as e: @@ -4113,8 +4115,13 @@ def proxies(self): @functools.cached_property def cookiejar(self): """Global cookiejar instance""" - return load_cookies( - self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self) + try: + return load_cookies( + self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self) + except CookieLoadError as error: + cause = error.__context__ + self.report_error(str(cause), tb=''.join(traceback.format_exception(cause))) + raise @property def _opener(self): diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index c2d19f94a0..f598b6c2fe 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -15,7 +15,7 @@ import traceback from .compat import compat_os_name -from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS +from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS, CookieLoadError from .downloader.external import get_external_downloader from .extractor import list_extractor_classes from .extractor.adobepass import MSO_INFO @@ -1084,7 +1084,7 @@ def main(argv=None): _IN_CLI = True try: _exit(*variadic(_real_main(argv))) - except DownloadError: + except (CookieLoadError, DownloadError): _exit(1) except SameFileError as e: _exit(f'ERROR: {e}') diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index cff8d74a74..4a69c576be 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -34,6 +34,7 @@ from .minicurses import MultilinePrinter, QuietMultilinePrinter from .utils import ( DownloadError, + YoutubeDLError, Popen, error_to_str, expand_path, @@ -86,24 +87,31 @@ def _create_progress_bar(logger): return printer +class CookieLoadError(YoutubeDLError): + pass + + def load_cookies(cookie_file, browser_specification, ydl): - cookie_jars = [] - if browser_specification is not None: - browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification) - cookie_jars.append( - extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container)) + try: + cookie_jars = [] + if browser_specification is not None: + browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification) + cookie_jars.append( + extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container)) - if cookie_file is not None: - is_filename = is_path_like(cookie_file) - if is_filename: - cookie_file = expand_path(cookie_file) + if cookie_file is not None: + is_filename = is_path_like(cookie_file) + if is_filename: + cookie_file = expand_path(cookie_file) - jar = YoutubeDLCookieJar(cookie_file) - if not is_filename or os.access(cookie_file, os.R_OK): - jar.load() - cookie_jars.append(jar) + jar = YoutubeDLCookieJar(cookie_file) + if not is_filename or os.access(cookie_file, os.R_OK): + jar.load() + cookie_jars.append(jar) - return _merge_cookie_jars(cookie_jars) + return _merge_cookie_jars(cookie_jars) + except Exception: + raise CookieLoadError('failed to load cookies') def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):