From 1b71001149a6ede48369e66ea23e8df03724dd79 Mon Sep 17 00:00:00 2001 From: McSwindler Date: Sat, 20 Apr 2024 12:07:02 -0500 Subject: [PATCH] [watchertv] Add extractor --- yt_dlp/extractor/_extractors.py | 4 + yt_dlp/extractor/watchertv.py | 215 ++++++++++++++++++++++++++++++++ 2 files changed, 219 insertions(+) create mode 100644 yt_dlp/extractor/watchertv.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 42034275b..d42722be9 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2306,6 +2306,10 @@ WashingtonPostArticleIE, ) from .wat import WatIE +from .watchertv import ( + WatcherTVSeasonIE, + WatcherTVIE +) from .wdr import ( WDRIE, WDRPageIE, diff --git a/yt_dlp/extractor/watchertv.py b/yt_dlp/extractor/watchertv.py new file mode 100644 index 000000000..4d28576e5 --- /dev/null +++ b/yt_dlp/extractor/watchertv.py @@ -0,0 +1,215 @@ +import functools + +from .common import InfoExtractor +from .vimeo import VHXEmbedIE +from ..utils import ( + ExtractorError, + OnDemandPagedList, + clean_html, + extract_attributes, + get_element_by_class, + get_element_by_id, + get_elements_html_by_class, + int_or_none, + traverse_obj, + unified_strdate, + urlencode_postdata, +) + + +class WatcherTVIE(InfoExtractor): + _LOGIN_URL = 'https://www.watchertv.com/login' + _NETRC_MACHINE = 'watchertv' + + _VALID_URL = r'https?://(?:www\.)?watchertv\.com/(?:[^/]+/)*videos/(?P[^/]+)/?$' + _TESTS = [ + { + 'url': 'https://www.watchertv.com/ghost-files/season:2/videos/gf-201', + 'note': 'Episode in a series', + 'md5': '99c9aab2cb62157467b7ef5e37266e4e', + 'info_dict': { + 'id': '3129338', + 'display_id': 'gf-201', + 'ext': 'mp4', + 'title': 'The Death Row Poltergeists of Missouri State Penitentiary', + 'description': 'Where Curiosity Meets Comedy', + 'release_date': '20230825', + 'thumbnail': 'https://vhx.imgix.net/watcherentertainment/assets/92c02f39-2ed6-4b51-9e63-1a907b82e2bc.png', + 'series': 'Ghost Files', + 'season_number': 2, + 'season': 'Season 2', + 'episode_number': 1, + 'episode': 'The Death Row Poltergeists of Missouri State Penitentiary', + 'duration': 3853, + 'uploader_id': 'user80538407', + 'uploader_url': 'https://vimeo.com/user80538407', + 'uploader': 'OTT Videos' + }, + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] + }, + { + 'url': 'https://www.watchertv.com/road-files/season:1/videos/rf101', + 'note': 'Episode in a series (missing release_date)', + 'md5': '02f9aaafc8ad9bd1be366cf6a61a68d8', + 'info_dict': { + 'id': '3187312', + 'display_id': 'rf101', + 'ext': 'mp4', + 'title': 'Road Files: Haunted Hill House', + 'description': 'Where Curiosity Meets Comedy', + 'thumbnail': 'https://vhx.imgix.net/watcherentertainment/assets/7445f23c-a3e7-47fb-835a-d288273e2698.png', + 'series': 'Road Files', + 'season_number': 1, + 'season': 'Season 1', + 'episode_number': 1, + 'episode': 'Road Files: Haunted Hill House', + 'duration': 516, + 'uploader_id': 'user80538407', + 'uploader_url': 'https://vimeo.com/user80538407', + 'uploader': 'OTT Videos' + }, + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] + }, + { + 'url': 'https://www.watchertv.com/videos/welcome-beta-users', + 'note': 'Episode not in a series', + 'md5': 'fd1db805f9adc442c38d706bba21ad03', + 'info_dict': { + 'id': '3187107', + 'display_id': 'welcome-beta-users', + 'ext': 'mp4', + 'title': 'Welcome to Watcher!', + 'description': 'Where Curiosity Meets Comedy', + 'release_date': '20240419', + 'thumbnail': 'https://vhx.imgix.net/watcherentertainment/assets/fbb90dc8-ebb0-4597-9a83-95729e234030.jpg', + 'duration': 92, + 'uploader_id': 'user80538407', + 'uploader_url': 'https://vimeo.com/user80538407', + 'uploader': 'OTT Videos' + }, + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] + } + ] + + def _get_authenticity_token(self, display_id): + signin_page = self._download_webpage( + self._LOGIN_URL, display_id, note='Getting authenticity token') + return self._html_search_regex( + r'name=["\']authenticity_token["\'] value=["\'](.+?)["\']', + signin_page, 'authenticity_token') + + def _login(self, display_id): + username, password = self._get_login_info() + if not username: + return True + + response = self._download_webpage( + self._LOGIN_URL, display_id, note='Logging in', fatal=False, + data=urlencode_postdata({ + 'email': username, + 'password': password, + 'authenticity_token': self._get_authenticity_token(display_id), + 'utf8': True + })) + + user_has_subscription = self._search_regex( + r'user_has_subscription:\s*["\'](.+?)["\']', response, 'subscription status', default='none') + if user_has_subscription.lower() == 'true': + return + elif user_has_subscription.lower() == 'false': + return 'Account is not subscribed' + else: + return 'Incorrect username/password' + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = None + if self._get_cookies('https://www.watchertv.com').get('_session'): + webpage = self._download_webpage(url, display_id) + if not webpage or '
[^\/$&?#]+)(?:/?$|/season:(?P[0-9]+)/?$)' + _TESTS = [ + { + 'url': 'https://www.watchertv.com/ghost-files/season:1', + 'note': 'Multi-season series with the season in the url', + 'playlist_count': 8, + 'info_dict': { + 'id': 'ghost-files-season-1', + 'title': 'Ghost Files - Season 1' + } + }, + { + 'url': 'https://www.watchertv.com/are-you-scared', + 'note': 'Multi-season series with the season not in the url', + 'playlist_count': 3, + 'info_dict': { + 'id': 'are-you-scared-season-1', + 'title': 'Are You Scared - Season 1' + } + }, + { + 'url': 'https://www.watchertv.com/watcher-one-offs', + 'note': 'Single-season series', + 'playlist_count': 16, + 'info_dict': { + 'id': 'watcher-one-offs-season-1', + 'title': 'Watcher One Offs - Season 1' + } + } + ] + + def _fetch_page(self, url, season_id, page): + page += 1 + webpage = self._download_webpage( + f'{url}?page={page}', season_id, note=f'Downloading page {page}', expected_status={400}) + yield from [self.url_result(item_url, WatcherTVIE) for item_url in traverse_obj( + get_elements_html_by_class('browse-item-link', webpage), (..., {extract_attributes}, 'href'))] + + def _real_extract(self, url): + season_id = self._match_id(url) + season_num = self._match_valid_url(url).group('season') or 1 + season_title = season_id.replace('-', ' ').title() + + return self.playlist_result( + OnDemandPagedList(functools.partial(self._fetch_page, url, season_id), self._PAGE_SIZE), + f'{season_id}-season-{season_num}', f'{season_title} - Season {season_num}')