From dd41cc4ade384e7de9961f4105c7b24c4cc2f98c Mon Sep 17 00:00:00 2001 From: McSwindler Date: Sun, 21 Apr 2024 08:51:10 -0500 Subject: [PATCH] [watchertv] update extractor to extend dropout instead of duplicating --- yt_dlp/extractor/dropout.py | 37 +++++------ yt_dlp/extractor/watchertv.py | 114 ++-------------------------------- 2 files changed, 24 insertions(+), 127 deletions(-) diff --git a/yt_dlp/extractor/dropout.py b/yt_dlp/extractor/dropout.py index 80ae6c126..b413da334 100644 --- a/yt_dlp/extractor/dropout.py +++ b/yt_dlp/extractor/dropout.py @@ -18,7 +18,7 @@ class DropoutIE(InfoExtractor): - _LOGIN_URL = 'https://www.dropout.tv/login' + _HOST = 'https://www.dropout.tv' _NETRC_MACHINE = 'dropout' _VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?:[^/]+/)*videos/(?P[^/]+)/?$' @@ -26,7 +26,7 @@ class DropoutIE(InfoExtractor): { 'url': 'https://www.dropout.tv/game-changer/season:2/videos/yes-or-no', 'note': 'Episode in a series', - 'md5': '5e000fdfd8d8fa46ff40456f1c2af04a', + 'md5': 'fc55805bac60b1ce2ffdc35fb9c51195', 'info_dict': { 'id': '738153', 'display_id': 'yes-or-no', @@ -48,22 +48,22 @@ class DropoutIE(InfoExtractor): 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] }, { - 'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1/videos/episode-1', + 'url': 'https://www.dropout.tv/ch-shorts/season:1/videos/post-apocalyptic-dane-cook', 'note': 'Episode in a series (missing release_date)', - 'md5': '712caf7c191f1c47c8f1879520c2fa5c', + 'md5': 'f260b8d7d0fdbaceae713c9196dac07f', 'info_dict': { - 'id': '320562', - 'display_id': 'episode-1', + 'id': '449042', + 'display_id': 'post-apocalyptic-dane-cook', 'ext': 'mp4', - 'title': 'The Beginning Begins', - 'description': 'The cast introduces their PCs, including a neurotic elf, a goblin PI, and a corn-worshipping cleric.', - 'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/4421ed0d-f630-4c88-9004-5251b2b8adfa.jpg', - 'series': 'Dimension 20: Fantasy High', + 'title': 'Post-Apocalyptic Dane Cook', + 'description': 'Dane Cook is back with his all new special. Don\'t worry, it\'s not the end of the world.', + 'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/5b0678df-d9c3-4864-b811-24db03072f4a.jpg', + 'series': 'CH Shorts', 'season_number': 1, 'season': 'Season 1', 'episode_number': 1, - 'episode': 'The Beginning Begins', - 'duration': 6838, + 'episode': 'Post-Apocalyptic Dane Cook', + 'duration': 135, 'uploader_id': 'user80538407', 'uploader_url': 'https://vimeo.com/user80538407', 'uploader': 'OTT Videos' @@ -73,7 +73,7 @@ class DropoutIE(InfoExtractor): { 'url': 'https://www.dropout.tv/videos/misfits-magic-holiday-special', 'note': 'Episode not in a series', - 'md5': 'c30fa18999c5880d156339f13c953a26', + 'md5': '147e0607bd877a791665c0b7219b512c', 'info_dict': { 'id': '1915774', 'display_id': 'misfits-magic-holiday-special', @@ -93,7 +93,7 @@ class DropoutIE(InfoExtractor): def _get_authenticity_token(self, display_id): signin_page = self._download_webpage( - self._LOGIN_URL, display_id, note='Getting authenticity token') + f'{self._HOST}/login', display_id, note='Getting authenticity token') return self._html_search_regex( r'name=["\']authenticity_token["\'] value=["\'](.+?)["\']', signin_page, 'authenticity_token') @@ -104,7 +104,7 @@ def _login(self, display_id): return True response = self._download_webpage( - self._LOGIN_URL, display_id, note='Logging in', fatal=False, + f'{self._HOST}/login', display_id, note='Logging in', fatal=False, data=urlencode_postdata({ 'email': username, 'password': password, @@ -125,7 +125,7 @@ def _real_extract(self, url): display_id = self._match_id(url) webpage = None - if self._get_cookies('https://www.dropout.tv').get('_session'): + if self._get_cookies(self._HOST).get('_session'): webpage = self._download_webpage(url, display_id) if not webpage or '
[^\/$&?#]+)(?:/?$|/season:(?P[0-9]+)/?$)' + _VIDEO_IE = DropoutIE _TESTS = [ { 'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1', @@ -211,7 +212,7 @@ def _fetch_page(self, url, season_id, page): page += 1 webpage = self._download_webpage( f'{url}?page={page}', season_id, note=f'Downloading page {page}', expected_status={400}) - yield from [self.url_result(item_url, DropoutIE) for item_url in traverse_obj( + yield from [self.url_result(item_url, self._VIDEO_IE) for item_url in traverse_obj( get_elements_html_by_class('browse-item-link', webpage), (..., {extract_attributes}, 'href'))] def _real_extract(self, url): diff --git a/yt_dlp/extractor/watchertv.py b/yt_dlp/extractor/watchertv.py index 4d28576e5..9079e8e48 100644 --- a/yt_dlp/extractor/watchertv.py +++ b/yt_dlp/extractor/watchertv.py @@ -1,24 +1,8 @@ -import functools - -from .common import InfoExtractor -from .vimeo import VHXEmbedIE -from ..utils import ( - ExtractorError, - OnDemandPagedList, - clean_html, - extract_attributes, - get_element_by_class, - get_element_by_id, - get_elements_html_by_class, - int_or_none, - traverse_obj, - unified_strdate, - urlencode_postdata, -) +from .dropout import DropoutIE, DropoutSeasonIE -class WatcherTVIE(InfoExtractor): - _LOGIN_URL = 'https://www.watchertv.com/login' +class WatcherTVIE(DropoutIE): + _HOST = 'https://www.watchertv.com' _NETRC_MACHINE = 'watchertv' _VALID_URL = r'https?://(?:www\.)?watchertv\.com/(?:[^/]+/)*videos/(?P[^/]+)/?$' @@ -91,83 +75,11 @@ class WatcherTVIE(InfoExtractor): } ] - def _get_authenticity_token(self, display_id): - signin_page = self._download_webpage( - self._LOGIN_URL, display_id, note='Getting authenticity token') - return self._html_search_regex( - r'name=["\']authenticity_token["\'] value=["\'](.+?)["\']', - signin_page, 'authenticity_token') - def _login(self, display_id): - username, password = self._get_login_info() - if not username: - return True - - response = self._download_webpage( - self._LOGIN_URL, display_id, note='Logging in', fatal=False, - data=urlencode_postdata({ - 'email': username, - 'password': password, - 'authenticity_token': self._get_authenticity_token(display_id), - 'utf8': True - })) - - user_has_subscription = self._search_regex( - r'user_has_subscription:\s*["\'](.+?)["\']', response, 'subscription status', default='none') - if user_has_subscription.lower() == 'true': - return - elif user_has_subscription.lower() == 'false': - return 'Account is not subscribed' - else: - return 'Incorrect username/password' - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = None - if self._get_cookies('https://www.watchertv.com').get('_session'): - webpage = self._download_webpage(url, display_id) - if not webpage or '
[^\/$&?#]+)(?:/?$|/season:(?P[0-9]+)/?$)' + _VIDEO_IE = WatcherTVIE _TESTS = [ { 'url': 'https://www.watchertv.com/ghost-files/season:1', @@ -197,19 +109,3 @@ class WatcherTVSeasonIE(InfoExtractor): } } ] - - def _fetch_page(self, url, season_id, page): - page += 1 - webpage = self._download_webpage( - f'{url}?page={page}', season_id, note=f'Downloading page {page}', expected_status={400}) - yield from [self.url_result(item_url, WatcherTVIE) for item_url in traverse_obj( - get_elements_html_by_class('browse-item-link', webpage), (..., {extract_attributes}, 'href'))] - - def _real_extract(self, url): - season_id = self._match_id(url) - season_num = self._match_valid_url(url).group('season') or 1 - season_title = season_id.replace('-', ' ').title() - - return self.playlist_result( - OnDemandPagedList(functools.partial(self._fetch_page, url, season_id), self._PAGE_SIZE), - f'{season_id}-season-{season_num}', f'{season_title} - Season {season_num}')