mirror of https://github.com/yt-dlp/yt-dlp.git
Merge 9bd6be571b
into 351dc0bc33
This commit is contained in:
commit
95a852fa13
|
@ -1418,7 +1418,7 @@ from .patreon import (
|
|||
PatreonIE,
|
||||
PatreonCampaignIE
|
||||
)
|
||||
from .pbs import PBSIE, PBSKidsIE
|
||||
from .pbs import PBSIE, PBSKidsIE, PBSShowIE
|
||||
from .pearvideo import PearVideoIE
|
||||
from .peekvids import PeekVidsIE, PlayVidsIE
|
||||
from .peertube import (
|
||||
|
|
|
@ -1,13 +1,19 @@
|
|||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
LazyList,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
js_to_json,
|
||||
clean_html,
|
||||
get_elements_html_by_class,
|
||||
get_element_html_by_class,
|
||||
extract_attributes,
|
||||
orderedSet,
|
||||
strip_jsonp,
|
||||
strip_or_none,
|
||||
|
@ -187,9 +193,9 @@ class PBSIE(InfoExtractor):
|
|||
_VALID_URL = r'''(?x)https?://
|
||||
(?:
|
||||
# Direct video URL
|
||||
(?:%s)/(?:(?:vir|port)alplayer|video)/(?P<id>[0-9]+)(?:[?/]|$) |
|
||||
(?:%s)/(?!show)(?:(?:vir|port)alplayer|video)/(?P<id>[^/]+)(?:[?/]|$) |
|
||||
# Article with embedded player (or direct video)
|
||||
(?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
|
||||
(?:www\.)?pbs\.org/(?!show)(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
|
||||
# Player
|
||||
(?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)
|
||||
)
|
||||
|
@ -198,6 +204,20 @@ class PBSIE(InfoExtractor):
|
|||
_GEO_COUNTRIES = ['US']
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://watch.opb.org/video/cherry-blossoms-at-portlands-waterfront-have-a-story-2e1de0/',
|
||||
'md5': 'af5a85ffecd6371e86f050b4ce5a3636',
|
||||
'info_dict': {
|
||||
'id': 'cherry-blossoms-at-portlands-waterfront-have-a-story-2e1de0',
|
||||
'ext': 'mp4',
|
||||
'title': 'Oregon Experience - Cherry Blossoms at Portland\'s Waterfront Have a Story',
|
||||
'description': 'md5:8d15d264cb6ed954ee08c8c0dcbd43a2',
|
||||
'duration': 167,
|
||||
'upload_date': '20190225',
|
||||
'chapters': [],
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
|
||||
'md5': '173dc391afd361fa72eab5d3d918968d',
|
||||
|
@ -681,8 +701,9 @@ class PBSIE(InfoExtractor):
|
|||
if alt_title:
|
||||
info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + r'[\s\-:]+', '', info['title'])
|
||||
|
||||
description = info.get('description') or info.get(
|
||||
'program', {}).get('description') or description
|
||||
upload_date = upload_date or unified_strdate(info.get("air_date"))
|
||||
description = info.get('description') or info.get("long_description") or info.get(
|
||||
"short_description") or info.get('program', {}).get('description') or description
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -755,3 +776,138 @@ class PBSKidsIE(InfoExtractor):
|
|||
'upload_date': ('video_obj', 'air_date', {unified_strdate}),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
class PBSShowIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:www\.)?(?:%s)/show\/(?P<presumptive_id>[^/]+?)(?:\.html)?\/?(?:$|[?#])
|
||||
''' % '|'.join(list(zip(*PBSIE._STATIONS))[0])
|
||||
|
||||
_TESTS = [
|
||||
# Full Show
|
||||
{
|
||||
'url': 'https://video.ksps.org/show/oregon-experience/',
|
||||
'info_dict': {
|
||||
'id': 'oregon-experience',
|
||||
'title': 'Oregon Experience',
|
||||
'description': 'md5:67b0184af36fcb5cc20df9974633eb90',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# Single Special
|
||||
{
|
||||
'url': 'https://video.ksps.org/show/betrayed-survivng-american-concentration-camp',
|
||||
'info_dict': {
|
||||
'id': 'betrayed-survivng-american-concentration-camp',
|
||||
'title': 'Betrayed: Surviving an American Concentration Camp',
|
||||
'description': 'md5:7e78ee497f1359c030d54d68339f31e8',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# Non-Season Episodes (uses season 1)
|
||||
{
|
||||
'url': 'https://video.ksps.org/show/a-brief-history-of-the-future/',
|
||||
'info_dict': {
|
||||
'id': 'a-brief-history-of-the-future',
|
||||
'title': 'A Brief History of the Future',
|
||||
'description': 'md5:08297c374c61361ac3f3d297b5157913',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
_JSON_SEARCH = r'<script[^>]+id="content-strip-data" type="application/json">'
|
||||
_SHOW_JSON_SEARCH = r'GTMDataLayer\.push\('
|
||||
|
||||
@staticmethod
|
||||
def _make_url(url, playlist_id):
|
||||
return f'https://{urllib.parse.urlparse(url).netloc}/show/{playlist_id}'
|
||||
|
||||
@staticmethod
|
||||
def _extract_episode(popover_html):
|
||||
clean = clean_html(popover_html)
|
||||
maybe_ep = re.search(r"Ep(\d+) ", clean)
|
||||
if maybe_ep is not None:
|
||||
return maybe_ep[1]
|
||||
return None
|
||||
|
||||
def _iterate_entries(self, url, playlist_id, season_indices):
|
||||
base_url = urllib.parse.urlparse(url).netloc
|
||||
|
||||
for season_idx in season_indices:
|
||||
season_id = f'{playlist_id}-season-{season_idx}'
|
||||
|
||||
season_page = self._download_webpage(
|
||||
f'{url}/episodes/season/{season_idx}'
|
||||
if season_idx > 0 else f'{url}/specials',
|
||||
video_id=season_id
|
||||
)
|
||||
episodes = [
|
||||
extract_attributes(elem)
|
||||
for elem in get_elements_html_by_class("video-summary", season_page)
|
||||
]
|
||||
if not episodes:
|
||||
continue
|
||||
|
||||
episode_indices = [
|
||||
self._extract_episode(elem)
|
||||
for elem in get_elements_html_by_class("popover__meta-data", season_page)
|
||||
]
|
||||
for i, ep in enumerate(episodes):
|
||||
url_kwargs = {}
|
||||
if len(episode_indices) == len(episodes) and episode_indices[i] is not None:
|
||||
url_kwargs['episode'] = episode_indices[i]
|
||||
|
||||
yield self.url_result(
|
||||
url=f'https://{base_url}/video/{ep["data-video-slug"]}',
|
||||
ie=PBSIE,
|
||||
video_id=ep["data-cid"],
|
||||
url_transparent=True,
|
||||
title=ep["data-title"],
|
||||
season=season_idx,
|
||||
**url_kwargs,
|
||||
)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_valid_url(url).group('presumptive_id')
|
||||
url = self._make_url(url=url, playlist_id=playlist_id)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
show_data = self._search_json(self._JSON_SEARCH, webpage, 'seasons', playlist_id)
|
||||
|
||||
playlist_description = clean_html(get_element_html_by_class(
|
||||
"show-hero__description--long is-hidden", webpage)
|
||||
)
|
||||
show_metadata = extract_attributes(
|
||||
get_element_html_by_class("show-hero__my-list btn--mylist--placeholder", webpage)
|
||||
)
|
||||
|
||||
playlist_title = show_metadata['data-gtm-label']
|
||||
clean_html(playlist_description[0])
|
||||
|
||||
# iterate seasons in reverse to get newest vids first
|
||||
season_indices = list(sorted(
|
||||
[
|
||||
x['ordinal'] for x in show_data['episodes_data']['seasons']
|
||||
if x.get('ordinal', 0) != 0
|
||||
],
|
||||
reverse=True
|
||||
))
|
||||
if not self._configuration_arg('exclude_specials', [None])[0]:
|
||||
season_indices = [0] + season_indices
|
||||
|
||||
return self.playlist_result(
|
||||
LazyList(self._iterate_entries(url, playlist_id, season_indices)),
|
||||
playlist_id=playlist_id,
|
||||
playlist_title=playlist_title,
|
||||
playlist_description=playlist_description,
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue