mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-05 23:54:24 +00:00
parent
9b0b627534
commit
87f89dacdd
|
@ -4,6 +4,7 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
|
@ -375,6 +376,35 @@ class PBSIE(InfoExtractor):
|
|||
},
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
},
|
||||
{
|
||||
'url': 'https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/',
|
||||
'info_dict': {
|
||||
'id': '3007193718',
|
||||
'ext': 'mp4',
|
||||
'title': "Victoria - A Soldier's Daughter / The Green-Eyed Monster",
|
||||
'description': 'md5:37efbac85e0c09b009586523ec143652',
|
||||
'duration': 6292,
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
},
|
||||
{
|
||||
'url': 'https://player.pbs.org/partnerplayer/tOz9tM5ljOXQqIIWke53UA==/',
|
||||
'info_dict': {
|
||||
'id': '3011407934',
|
||||
'ext': 'mp4',
|
||||
'title': 'Stories from the Stage - Road Trip',
|
||||
'duration': 1619,
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
},
|
||||
{
|
||||
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
|
||||
'only_matching': True,
|
||||
|
@ -438,6 +468,7 @@ def _extract_webpage(self, url):
|
|||
r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>', # jwplayer
|
||||
r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',",
|
||||
r'<div[^>]+\bdata-cove-id=["\'](\d+)"', # http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/
|
||||
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//video\.pbs\.org/widget/partnerplayer/(\d+)', # https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/
|
||||
]
|
||||
|
||||
media_id = self._search_regex(
|
||||
|
@ -472,7 +503,8 @@ def _extract_webpage(self, url):
|
|||
if not url:
|
||||
url = self._og_search_url(webpage)
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
mobj = re.match(
|
||||
self._VALID_URL, self._proto_relative_url(url.strip()))
|
||||
|
||||
player_id = mobj.group('player_id')
|
||||
if not display_id:
|
||||
|
@ -482,13 +514,27 @@ def _extract_webpage(self, url):
|
|||
url, display_id, note='Downloading player page',
|
||||
errnote='Could not download player page')
|
||||
video_id = self._search_regex(
|
||||
r'<div\s+id="video_([0-9]+)"', player_page, 'video ID')
|
||||
r'<div\s+id=["\']video_(\d+)', player_page, 'video ID',
|
||||
default=None)
|
||||
if not video_id:
|
||||
video_info = self._extract_video_data(
|
||||
player_page, 'video data', display_id)
|
||||
video_id = compat_str(
|
||||
video_info.get('id') or video_info['contentID'])
|
||||
else:
|
||||
video_id = mobj.group('id')
|
||||
display_id = video_id
|
||||
|
||||
return video_id, display_id, None, description
|
||||
|
||||
def _extract_video_data(self, string, name, video_id, fatal=True):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
[r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
|
||||
r'window\.videoBridge\s*=\s*({.+?});'],
|
||||
string, name, default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=fatal)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id, upload_date, description = self._extract_webpage(url)
|
||||
|
||||
|
@ -519,11 +565,8 @@ def extract_redirect_urls(info):
|
|||
'http://player.pbs.org/%s/%s' % (page, video_id),
|
||||
display_id, 'Downloading %s page' % page, fatal=False)
|
||||
if player:
|
||||
video_info = self._parse_json(
|
||||
self._search_regex(
|
||||
[r'(?s)PBS\.videoData\s*=\s*({.+?});\n', r'window\.videoBridge\s*=\s*({.+?});'],
|
||||
player, '%s video data' % page, default='{}'),
|
||||
display_id, transform_source=js_to_json, fatal=False)
|
||||
video_info = self._extract_video_data(
|
||||
player, '%s video data' % page, display_id, fatal=False)
|
||||
if video_info:
|
||||
extract_redirect_urls(video_info)
|
||||
if not info:
|
||||
|
|
Loading…
Reference in a new issue