diff --git a/yt_dlp/extractor/pbs.py b/yt_dlp/extractor/pbs.py index b563027d6..f6160bbfd 100644 --- a/yt_dlp/extractor/pbs.py +++ b/yt_dlp/extractor/pbs.py @@ -1,15 +1,20 @@ +import functools import re from .common import InfoExtractor from ..compat import compat_str from ..utils import ( ExtractorError, + OnDemandPagedList, determine_ext, int_or_none, float_or_none, js_to_json, clean_html, get_elements_html_by_class, + get_element_html_by_class, + get_element_by_id, +extract_attributes, orderedSet, strip_jsonp, strip_or_none, @@ -795,9 +800,10 @@ class PBSShowIE(InfoExtractor): }] _JSON_SEARCH = r']+id="content-strip-data" type="application/json">' + _SHOW_JSON_SEARCH = r'GTMDataLayer\.push\(' + PAGE_SIZE = 40 _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7' - PAGE_SIZE = 25 HTML_CLASS_NAMES = { 'channel': { 'container': 'channel-videos-container', @@ -816,22 +822,48 @@ class PBSShowIE(InfoExtractor): def _make_url(playlist_id): return f'https://watch.opb.org/show/{playlist_id}' + def _fetch_season_page(self, playlist_id, page_num): + playlist_url = self._make_url(playlist_id) + season_id = f'{playlist_id}-{page_num}' + + season_page = self._download_webpage(f'{playlist_url}/episodes/season/{page_num}', video_id=season_id) + season_data = get_elements_html_by_class("video-summary", season_page) + def _real_extract(self, url): playlist_id = self._match_valid_url(url).group('id') playlist_url = self._make_url(playlist_id) webpage = self._download_webpage(self._make_url(playlist_id), playlist_id) show_data = self._search_json(self._JSON_SEARCH, webpage, 'seasons', playlist_id) + # show_metadata = self._search_json(self._SHOW_JSON_SEARCH, webpage, 'show metadata', playlist_id) - for show_season_metadata in sorted(show_data.get('episodes_data', {}).get('seasons', []), key=lambda x: x.get('ordinal', 0), reverse=True): - season_ordinal = show_season_metadata.get('ordinal', 0) - if season_ordinal == 0: - continue + playlist_description = clean_html(get_element_html_by_class("show-hero__description--long is-hidden", webpage)) + show_metadata = extract_attributes(get_element_html_by_class("show-hero__my-list btn--mylist--placeholder", webpage)) - season_id = f'{playlist_id}-{season_ordinal}' + playlist_title = show_metadata['data-gtml-label'] + clean_html(playlist_description[0]) - season_page = self._download_webpage(f'{playlist_url}/episodes/season/{season_ordinal}', video_id=season_id) - season_data = get_elements_html_by_class("video-summary", season_page) - pass + + + return self.playlist_result( + OnDemandPagedList( + pagefunc=functools.partial(self._fetch_season_page, playlist_id), + pagesize=self.PAGE_SIZE + ), + playlist_id=playlist_id, + playlist_title=playlist_title, + # playlist_title= + ) + + # for show_season_metadata in sorted(show_data.get('episodes_data', {}).get('seasons', []), key=lambda x: x.get('ordinal', 0), reverse=True): + # season_ordinal = show_season_metadata.get('ordinal', 0) + # if season_ordinal == 0: + # continue + # + # season_id = f'{playlist_id}-{season_ordinal}' + # + # season_page = self._download_webpage(f'{playlist_url}/episodes/season/{season_ordinal}', video_id=season_id) + # season_data = get_elements_html_by_class("video-summary", season_page) + # pass return