Update to ytdl-2021.03.02

This commit is contained in:
pukkandan 2021-03-02 13:35:59 +05:30
parent b3b30a4bca
commit ec5e77c558
No known key found for this signature in database
GPG key ID: 0F00D95A001F4698
10 changed files with 501 additions and 401 deletions

View file

@ -0,0 +1,37 @@
# coding: utf-8
from __future__ import unicode_literals
from .brightcove import BrightcoveNewIE
from ..utils import extract_attributes
class BandaiChannelIE(BrightcoveNewIE):
IE_NAME = 'bandaichannel'
_VALID_URL = r'https?://(?:www\.)?b-ch\.com/titles/(?P<id>\d+/\d+)'
_TESTS = [{
'url': 'https://www.b-ch.com/titles/514/001',
'md5': 'a0f2d787baa5729bed71108257f613a4',
'info_dict': {
'id': '6128044564001',
'ext': 'mp4',
'title': 'メタルファイターMIKU 第1話',
'timestamp': 1580354056,
'uploader_id': '5797077852001',
'upload_date': '20200130',
'duration': 1387.733,
},
'params': {
'format': 'bestvideo',
'skip_download': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
attrs = extract_attributes(self._search_regex(
r'(<video-js[^>]+\bid="bcplayer"[^>]*>)', webpage, 'player'))
bc = self._download_json(
'https://pbifcd.b-ch.com/v1/playbackinfo/ST/70/' + attrs['data-info'],
video_id, headers={'X-API-KEY': attrs['data-auth'].strip()})['bc']
return self._parse_brightcove_metadata(bc, bc['id'])

View file

@ -1,193 +1,43 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re from .zdf import ZDFIE
from .common import InfoExtractor
from ..utils import (
int_or_none,
unified_strdate,
xpath_text,
determine_ext,
float_or_none,
ExtractorError,
)
class DreiSatIE(InfoExtractor): class DreiSatIE(ZDFIE):
IE_NAME = '3sat' IE_NAME = '3sat'
_GEO_COUNTRIES = ['DE'] _VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
_VALID_URL = r'https?://(?:www\.)?3sat\.de/mediathek/(?:(?:index|mediathek)\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)' _TESTS = [{
_TESTS = [ # Same as https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html
{ 'url': 'https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html',
'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918', 'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
'md5': 'be37228896d30a88f315b638900a026e',
'info_dict': { 'info_dict': {
'id': '45918', 'id': '141007_ab18_10wochensommer_film',
'ext': 'mp4',
'title': 'Ab 18! - 10 Wochen Sommer',
'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26',
'duration': 2660,
'timestamp': 1608604200,
'upload_date': '20201222',
},
}, {
'url': 'https://www.3sat.de/gesellschaft/schweizweit/waidmannsheil-100.html',
'info_dict': {
'id': '140913_sendung_schweizweit',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Waidmannsheil', 'title': 'Waidmannsheil',
'description': 'md5:cce00ca1d70e21425e72c86a98a56817', 'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
'uploader': 'SCHWEIZWEIT', 'timestamp': 1410623100,
'uploader_id': '100000210',
'upload_date': '20140913' 'upload_date': '20140913'
}, },
'params': { 'params': {
'skip_download': True, # m3u8 downloads 'skip_download': True,
} }
}, }, {
{ # Same as https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html
'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066', 'url': 'https://www.3sat.de/film/spielfilm/der-hauptmann-100.html',
'only_matching': True, 'only_matching': True,
}, }, {
] # Same as https://www.zdf.de/wissen/nano/nano-21-mai-2019-102.html, equal media ids
'url': 'https://www.3sat.de/wissen/nano/nano-21-mai-2019-102.html',
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): 'only_matching': True,
param_groups = {} }]
for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)):
group_id = param_group.get(self._xpath_ns(
'id', 'http://www.w3.org/XML/1998/namespace'))
params = {}
for param in param_group:
params[param.get('name')] = param.get('value')
param_groups[group_id] = params
formats = []
for video in smil.findall(self._xpath_ns('.//video', namespace)):
src = video.get('src')
if not src:
continue
bitrate = int_or_none(self._search_regex(r'_(\d+)k', src, 'bitrate', None)) or float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
group_id = video.get('paramGroup')
param_group = param_groups[group_id]
for proto in param_group['protocols'].split(','):
formats.append({
'url': '%s://%s' % (proto, param_group['host']),
'app': param_group['app'],
'play_path': src,
'ext': 'flv',
'format_id': '%s-%d' % (proto, bitrate),
'tbr': bitrate,
})
self._sort_formats(formats)
return formats
def extract_from_xml_url(self, video_id, xml_url):
doc = self._download_xml(
xml_url, video_id,
note='Downloading video info',
errnote='Failed to download video info')
status_code = xpath_text(doc, './status/statuscode')
if status_code and status_code != 'ok':
if status_code == 'notVisibleAnymore':
message = 'Video %s is not available' % video_id
else:
message = '%s returned error: %s' % (self.IE_NAME, status_code)
raise ExtractorError(message, expected=True)
title = xpath_text(doc, './/information/title', 'title', True)
urls = []
formats = []
for fnode in doc.findall('.//formitaeten/formitaet'):
video_url = xpath_text(fnode, 'url')
if not video_url or video_url in urls:
continue
urls.append(video_url)
is_available = 'http://www.metafilegenerator' not in video_url
geoloced = 'static_geoloced_online' in video_url
if not is_available or geoloced:
continue
format_id = fnode.attrib['basetype']
format_m = re.match(r'''(?x)
(?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
(?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
''', format_id)
ext = determine_ext(video_url, None) or format_m.group('container')
if ext == 'meta':
continue
elif ext == 'smil':
formats.extend(self._extract_smil_formats(
video_url, video_id, fatal=False))
elif ext == 'm3u8':
# the certificates are misconfigured (see
# https://github.com/ytdl-org/youtube-dl/issues/8665)
if video_url.startswith('https://'):
continue
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', 'm3u8_native',
m3u8_id=format_id, fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
video_url, video_id, f4m_id=format_id, fatal=False))
else:
quality = xpath_text(fnode, './quality')
if quality:
format_id += '-' + quality
abr = int_or_none(xpath_text(fnode, './audioBitrate'), 1000)
vbr = int_or_none(xpath_text(fnode, './videoBitrate'), 1000)
tbr = int_or_none(self._search_regex(
r'_(\d+)k', video_url, 'bitrate', None))
if tbr and vbr and not abr:
abr = tbr - vbr
formats.append({
'format_id': format_id,
'url': video_url,
'ext': ext,
'acodec': format_m.group('acodec'),
'vcodec': format_m.group('vcodec'),
'abr': abr,
'vbr': vbr,
'tbr': tbr,
'width': int_or_none(xpath_text(fnode, './width')),
'height': int_or_none(xpath_text(fnode, './height')),
'filesize': int_or_none(xpath_text(fnode, './filesize')),
'protocol': format_m.group('proto').lower(),
})
geolocation = xpath_text(doc, './/details/geolocation')
if not formats and geolocation and geolocation != 'none':
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
self._sort_formats(formats)
thumbnails = []
for node in doc.findall('.//teaserimages/teaserimage'):
thumbnail_url = node.text
if not thumbnail_url:
continue
thumbnail = {
'url': thumbnail_url,
}
thumbnail_key = node.get('key')
if thumbnail_key:
m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
if m:
thumbnail['width'] = int(m.group(1))
thumbnail['height'] = int(m.group(2))
thumbnails.append(thumbnail)
upload_date = unified_strdate(xpath_text(doc, './/details/airtime'))
return {
'id': video_id,
'title': title,
'description': xpath_text(doc, './/information/detail'),
'duration': int_or_none(xpath_text(doc, './/details/lengthSec')),
'thumbnails': thumbnails,
'uploader': xpath_text(doc, './/details/originChannelTitle'),
'uploader_id': xpath_text(doc, './/details/originChannelId'),
'upload_date': upload_date,
'formats': formats,
}
def _real_extract(self, url):
video_id = self._match_id(url)
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?id=%s' % video_id
return self.extract_from_xml_url(video_id, details_url)

View file

@ -103,6 +103,7 @@
) )
from .azmedien import AZMedienIE from .azmedien import AZMedienIE
from .baidu import BaiduVideoIE from .baidu import BaiduVideoIE
from .bandaichannel import BandaiChannelIE
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
from .bbc import ( from .bbc import (
BBCCoUkIE, BBCCoUkIE,

View file

@ -1,52 +1,128 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor import re
from ..utils import ExtractorError
from .youtube import YoutubeIE
from .zdf import ZDFBaseIE
from ..compat import compat_str
from ..utils import (
int_or_none,
merge_dicts,
unified_timestamp,
xpath_text,
)
class PhoenixIE(InfoExtractor): class PhoenixIE(ZDFBaseIE):
IE_NAME = 'phoenix.de' IE_NAME = 'phoenix.de'
_VALID_URL = r'''https?://(?:www\.)?phoenix.de/\D+(?P<id>\d+)\.html''' _VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/]+/)*[^/?#&]*-a-(?P<id>\d+)\.html'
_TESTS = [ _TESTS = [{
{ # Same as https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html
'url': 'https://www.phoenix.de/sendungen/dokumentationen/unsere-welt-in-zukunft---stadt-a-1283620.html', 'url': 'https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html',
'md5': '5e765e838aa3531c745a4f5b249ee3e3', 'md5': '34ec321e7eb34231fd88616c65c92db0',
'info_dict': { 'info_dict': {
'id': '0OB4HFc43Ns', 'id': '210222_phx_nachgehakt_corona_protest',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Unsere Welt in Zukunft - Stadt', 'title': 'Wohin führt der Protest in der Pandemie?',
'description': 'md5:9bfb6fd498814538f953b2dcad7ce044', 'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
'upload_date': '20190912', 'duration': 1691,
'timestamp': 1613906100,
'upload_date': '20210221',
'uploader': 'Phoenix',
'channel': 'corona nachgehakt',
},
}, {
# Youtube embed
'url': 'https://www.phoenix.de/sendungen/gespraeche/phoenix-streitgut-brennglas-corona-a-1965505.html',
'info_dict': {
'id': 'hMQtqFYjomk',
'ext': 'mp4',
'title': 'phoenix streitgut: Brennglas Corona - Wie gerecht ist unsere Gesellschaft?',
'description': 'md5:ac7a02e2eb3cb17600bc372e4ab28fdd',
'duration': 3509,
'upload_date': '20201219',
'uploader': 'phoenix', 'uploader': 'phoenix',
'uploader_id': 'phoenix', 'uploader_id': 'phoenix',
}
}, },
{ 'params': {
'url': 'https://www.phoenix.de/drohnenangriffe-in-saudi-arabien-a-1286995.html?ref=aktuelles', 'skip_download': True,
},
}, {
'url': 'https://www.phoenix.de/entwicklungen-in-russland-a-2044720.html',
'only_matching': True, 'only_matching': True,
}, }, {
# an older page: https://www.phoenix.de/sendungen/gespraeche/phoenix-persoenlich/im-dialog-a-177727.html # no media
# seems to not have an embedded video, even though it's uploaded on youtube: https://www.youtube.com/watch?v=4GxnoUHvOkM 'url': 'https://www.phoenix.de/sendungen/dokumentationen/mit-dem-jumbo-durch-die-nacht-a-89625.html',
] 'only_matching': True,
}, {
def extract_from_json_api(self, video_id, api_url): # Same as https://www.zdf.de/politik/phoenix-sendungen/die-gesten-der-maechtigen-100.html
doc = self._download_json( 'url': 'https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche',
api_url, video_id, 'only_matching': True,
note="Downloading webpage metadata", }]
errnote="Failed to load webpage metadata")
for a in doc["absaetze"]:
if a["typ"] == "video-youtube":
return {
'_type': 'url_transparent',
'id': a["id"],
'title': doc["titel"],
'url': "https://www.youtube.com/watch?v=%s" % a["id"],
'ie_key': 'Youtube',
}
raise ExtractorError("No downloadable video found", expected=True)
def _real_extract(self, url): def _real_extract(self, url):
page_id = self._match_id(url) article_id = self._match_id(url)
api_url = 'https://www.phoenix.de/response/id/%s' % page_id
return self.extract_from_json_api(page_id, api_url) article = self._download_json(
'https://www.phoenix.de/response/id/%s' % article_id, article_id,
'Downloading article JSON')
video = article['absaetze'][0]
title = video.get('titel') or article.get('subtitel')
if video.get('typ') == 'video-youtube':
video_id = video['id']
return self.url_result(
video_id, ie=YoutubeIE.ie_key(), video_id=video_id,
video_title=title)
video_id = compat_str(video.get('basename') or video.get('content'))
details = self._download_xml(
'https://www.phoenix.de/php/mediaplayer/data/beitrags_details.php',
video_id, 'Downloading details XML', query={
'ak': 'web',
'ptmd': 'true',
'id': video_id,
'profile': 'player2',
})
title = title or xpath_text(
details, './/information/title', 'title', fatal=True)
content_id = xpath_text(
details, './/video/details/basename', 'content id', fatal=True)
info = self._extract_ptmd(
'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/%s' % content_id,
content_id, None, url)
timestamp = unified_timestamp(xpath_text(details, './/details/airtime'))
thumbnails = []
for node in details.findall('.//teaserimages/teaserimage'):
thumbnail_url = node.text
if not thumbnail_url:
continue
thumbnail = {
'url': thumbnail_url,
}
thumbnail_key = node.get('key')
if thumbnail_key:
m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
if m:
thumbnail['width'] = int(m.group(1))
thumbnail['height'] = int(m.group(2))
thumbnails.append(thumbnail)
return merge_dicts(info, {
'id': content_id,
'title': title,
'description': xpath_text(details, './/information/detail'),
'duration': int_or_none(xpath_text(details, './/details/lengthSec')),
'thumbnails': thumbnails,
'timestamp': timestamp,
'uploader': xpath_text(details, './/details/channel'),
'uploader_id': xpath_text(details, './/details/originChannelId'),
'channel': xpath_text(details, './/details/originChannelTitle'),
})

View file

@ -6,11 +6,12 @@
from .srgssr import SRGSSRIE from .srgssr import SRGSSRIE
from ..compat import compat_str from ..compat import compat_str
from ..utils import ( from ..utils import (
determine_ext,
int_or_none, int_or_none,
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
unescapeHTML, unescapeHTML,
determine_ext, urljoin,
) )
@ -21,7 +22,7 @@ class RTSIE(SRGSSRIE):
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html', 'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
'md5': 'ff7f8450a90cf58dacb64e29707b4a8e', 'md5': '753b877968ad8afaeddccc374d4256a5',
'info_dict': { 'info_dict': {
'id': '3449373', 'id': '3449373',
'display_id': 'les-enfants-terribles', 'display_id': 'les-enfants-terribles',
@ -35,6 +36,7 @@ class RTSIE(SRGSSRIE):
'thumbnail': r're:^https?://.*\.image', 'thumbnail': r're:^https?://.*\.image',
'view_count': int, 'view_count': int,
}, },
'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
}, },
{ {
'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html', 'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html',
@ -63,11 +65,12 @@ class RTSIE(SRGSSRIE):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
'skip': 'Blocked outside Switzerland', 'skip': 'Blocked outside Switzerland',
}, },
{ {
'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html', 'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html',
'md5': '1bae984fe7b1f78e94abc74e802ed99f', 'md5': '9bb06503773c07ce83d3cbd793cebb91',
'info_dict': { 'info_dict': {
'id': '5745356', 'id': '5745356',
'display_id': 'londres-cachee-par-un-epais-smog', 'display_id': 'londres-cachee-par-un-epais-smog',
@ -81,6 +84,7 @@ class RTSIE(SRGSSRIE):
'thumbnail': r're:^https?://.*\.image', 'thumbnail': r're:^https?://.*\.image',
'view_count': int, 'view_count': int,
}, },
'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
}, },
{ {
'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html', 'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html',
@ -160,7 +164,7 @@ def download_json(internal_id):
media_type = 'video' if 'video' in all_info else 'audio' media_type = 'video' if 'video' in all_info else 'audio'
# check for errors # check for errors
self.get_media_data('rts', media_type, media_id) self._get_media_data('rts', media_type, media_id)
info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio'] info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio']
@ -194,6 +198,7 @@ def extract_bitrate(url):
'tbr': extract_bitrate(format_url), 'tbr': extract_bitrate(format_url),
}) })
download_base = 'http://rtsww%s-d.rts.ch/' % ('-a' if media_type == 'audio' else '')
for media in info.get('media', []): for media in info.get('media', []):
media_url = media.get('url') media_url = media.get('url')
if not media_url or re.match(r'https?://', media_url): if not media_url or re.match(r'https?://', media_url):
@ -205,7 +210,7 @@ def extract_bitrate(url):
format_id += '-%dk' % rate format_id += '-%dk' % rate
formats.append({ formats.append({
'format_id': format_id, 'format_id': format_id,
'url': 'http://download-video.rts.ch/' + media_url, 'url': urljoin(download_base, media_url),
'tbr': rate or extract_bitrate(media_url), 'tbr': rate or extract_bitrate(media_url),
}) })

View file

@ -4,16 +4,32 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none,
int_or_none,
parse_iso8601, parse_iso8601,
qualities, qualities,
try_get,
) )
class SRGSSRIE(InfoExtractor): class SRGSSRIE(InfoExtractor):
_VALID_URL = r'(?:https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f\-]{36}|\d+)' _VALID_URL = r'''(?x)
(?:
https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|
srgssr
):
(?P<bu>
srf|rts|rsi|rtr|swi
):(?:[^:]+:)?
(?P<type>
video|audio
):
(?P<id>
[0-9a-f\-]{36}|\d+
)
'''
_GEO_BYPASS = False _GEO_BYPASS = False
_GEO_COUNTRIES = ['CH'] _GEO_COUNTRIES = ['CH']
@ -25,25 +41,39 @@ class SRGSSRIE(InfoExtractor):
'LEGAL': 'The video cannot be transmitted for legal reasons.', 'LEGAL': 'The video cannot be transmitted for legal reasons.',
'STARTDATE': 'This video is not yet available. Please try again later.', 'STARTDATE': 'This video is not yet available. Please try again later.',
} }
_DEFAULT_LANGUAGE_CODES = {
'srf': 'de',
'rts': 'fr',
'rsi': 'it',
'rtr': 'rm',
'swi': 'en',
}
def _get_tokenized_src(self, url, video_id, format_id): def _get_tokenized_src(self, url, video_id, format_id):
sp = compat_urllib_parse_urlparse(url).path.split('/')
token = self._download_json( token = self._download_json(
'http://tp.srgssr.ch/akahd/token?acl=/%s/%s/*' % (sp[1], sp[2]), 'http://tp.srgssr.ch/akahd/token?acl=*',
video_id, 'Downloading %s token' % format_id, fatal=False) or {} video_id, 'Downloading %s token' % format_id, fatal=False) or {}
auth_params = token.get('token', {}).get('authparams') auth_params = try_get(token, lambda x: x['token']['authparams'])
if auth_params: if auth_params:
url += '?' + auth_params url += ('?' if '?' not in url else '&') + auth_params
return url return url
def get_media_data(self, bu, media_type, media_id): def _get_media_data(self, bu, media_type, media_id):
media_data = self._download_json( query = {'onlyChapters': True} if media_type == 'video' else {}
'http://il.srgssr.ch/integrationlayer/1.0/ue/%s/%s/play/%s.json' % (bu, media_type, media_id), full_media_data = self._download_json(
media_id)[media_type.capitalize()] 'https://il.srgssr.ch/integrationlayer/2.0/%s/mediaComposition/%s/%s.json'
% (bu, media_type, media_id),
media_id, query=query)['chapterList']
try:
media_data = next(
x for x in full_media_data if x.get('id') == media_id)
except StopIteration:
raise ExtractorError('No media information found')
if media_data.get('block') and media_data['block'] in self._ERRORS: block_reason = media_data.get('blockReason')
message = self._ERRORS[media_data['block']] if block_reason and block_reason in self._ERRORS:
if media_data['block'] == 'GEOBLOCK': message = self._ERRORS[block_reason]
if block_reason == 'GEOBLOCK':
self.raise_geo_restricted( self.raise_geo_restricted(
msg=message, countries=self._GEO_COUNTRIES) msg=message, countries=self._GEO_COUNTRIES)
raise ExtractorError( raise ExtractorError(
@ -53,53 +83,75 @@ def get_media_data(self, bu, media_type, media_id):
def _real_extract(self, url): def _real_extract(self, url):
bu, media_type, media_id = re.match(self._VALID_URL, url).groups() bu, media_type, media_id = re.match(self._VALID_URL, url).groups()
media_data = self._get_media_data(bu, media_type, media_id)
title = media_data['title']
media_data = self.get_media_data(bu, media_type, media_id)
metadata = media_data['AssetMetadatas']['AssetMetadata'][0]
title = metadata['title']
description = metadata.get('description')
created_date = media_data.get('createdDate') or metadata.get('createdDate')
timestamp = parse_iso8601(created_date)
thumbnails = [{
'id': image.get('id'),
'url': image['url'],
} for image in media_data.get('Image', {}).get('ImageRepresentations', {}).get('ImageRepresentation', [])]
preference = qualities(['LQ', 'MQ', 'SD', 'HQ', 'HD'])
formats = [] formats = []
for source in media_data.get('Playlists', {}).get('Playlist', []) + media_data.get('Downloads', {}).get('Download', []): q = qualities(['SD', 'HD'])
protocol = source.get('@protocol') for source in (media_data.get('resourceList') or []):
for asset in source['url']: format_url = source.get('url')
asset_url = asset['text'] if not format_url:
quality = asset['@quality'] continue
format_id = '%s-%s' % (protocol, quality) protocol = source.get('protocol')
if protocol.startswith('HTTP-HDS') or protocol.startswith('HTTP-HLS'): quality = source.get('quality')
asset_url = self._get_tokenized_src(asset_url, media_id, format_id) format_id = []
if protocol.startswith('HTTP-HDS'): for e in (protocol, source.get('encoding'), quality):
formats.extend(self._extract_f4m_formats( if e:
asset_url + ('?' if '?' not in asset_url else '&') + 'hdcore=3.4.0', format_id.append(e)
media_id, f4m_id=format_id, fatal=False)) format_id = '-'.join(format_id)
elif protocol.startswith('HTTP-HLS'):
if protocol in ('HDS', 'HLS'):
if source.get('tokenType') == 'AKAMAI':
format_url = self._get_tokenized_src(
format_url, media_id, format_id)
formats.extend(self._extract_akamai_formats(
format_url, media_id))
elif protocol == 'HLS':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
asset_url, media_id, 'mp4', 'm3u8_native', format_url, media_id, 'mp4', 'm3u8_native',
m3u8_id=format_id, fatal=False)) m3u8_id=format_id, fatal=False))
else: elif protocol in ('HTTP', 'HTTPS'):
formats.append({ formats.append({
'format_id': format_id, 'format_id': format_id,
'url': asset_url, 'url': format_url,
'quality': preference(quality), 'quality': q(quality),
'ext': 'flv' if protocol == 'RTMP' else None, })
# This is needed because for audio medias the podcast url is usually
# always included, even if is only an audio segment and not the
# whole episode.
if int_or_none(media_data.get('position')) == 0:
for p in ('S', 'H'):
podcast_url = media_data.get('podcast%sdUrl' % p)
if not podcast_url:
continue
quality = p + 'D'
formats.append({
'format_id': 'PODCAST-' + quality,
'url': podcast_url,
'quality': q(quality),
}) })
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {}
if media_type == 'video':
for sub in (media_data.get('subtitleList') or []):
sub_url = sub.get('url')
if not sub_url:
continue
lang = sub.get('locale') or self._DEFAULT_LANGUAGE_CODES[bu]
subtitles.setdefault(lang, []).append({
'url': sub_url,
})
return { return {
'id': media_id, 'id': media_id,
'title': title, 'title': title,
'description': description, 'description': media_data.get('description'),
'timestamp': timestamp, 'timestamp': parse_iso8601(media_data.get('date')),
'thumbnails': thumbnails, 'thumbnail': media_data.get('imageUrl'),
'duration': float_or_none(media_data.get('duration'), 1000),
'subtitles': subtitles,
'formats': formats, 'formats': formats,
} }
@ -119,26 +171,17 @@ class SRGSSRPlayIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5', 'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
'md5': 'da6b5b3ac9fa4761a942331cef20fcb3', 'md5': '6db2226ba97f62ad42ce09783680046c',
'info_dict': { 'info_dict': {
'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5', 'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5',
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20130701', 'upload_date': '20130701',
'title': 'Snowden beantragt Asyl in Russland', 'title': 'Snowden beantragt Asyl in Russland',
'timestamp': 1372713995, 'timestamp': 1372708215,
} 'duration': 113.827,
}, { 'thumbnail': r're:^https?://.*1383719781\.png$',
# No Speichern (Save) button
'url': 'http://www.srf.ch/play/tv/top-gear/video/jaguar-xk120-shadow-und-tornado-dampflokomotive?id=677f5829-e473-4823-ac83-a1087fe97faa',
'md5': '0a274ce38fda48c53c01890651985bc6',
'info_dict': {
'id': '677f5829-e473-4823-ac83-a1087fe97faa',
'ext': 'flv',
'upload_date': '20130710',
'title': 'Jaguar XK120, Shadow und Tornado-Dampflokomotive',
'description': 'md5:88604432b60d5a38787f152dec89cd56',
'timestamp': 1373493600,
}, },
'expected_warnings': ['Unable to download f4m manifest'],
}, { }, {
'url': 'http://www.rtr.ch/play/radio/actualitad/audio/saira-tujetsch-tuttina-cuntinuar-cun-sedrun-muster-turissem?id=63cb0778-27f8-49af-9284-8c7a8c6d15fc', 'url': 'http://www.rtr.ch/play/radio/actualitad/audio/saira-tujetsch-tuttina-cuntinuar-cun-sedrun-muster-turissem?id=63cb0778-27f8-49af-9284-8c7a8c6d15fc',
'info_dict': { 'info_dict': {
@ -146,7 +189,8 @@ class SRGSSRPlayIE(InfoExtractor):
'ext': 'mp3', 'ext': 'mp3',
'upload_date': '20151013', 'upload_date': '20151013',
'title': 'Saira: Tujetsch - tuttina cuntinuar cun Sedrun Mustér Turissem', 'title': 'Saira: Tujetsch - tuttina cuntinuar cun Sedrun Mustér Turissem',
'timestamp': 1444750398, 'timestamp': 1444709160,
'duration': 336.816,
}, },
'params': { 'params': {
# rtmp download # rtmp download
@ -159,19 +203,32 @@ class SRGSSRPlayIE(InfoExtractor):
'id': '6348260', 'id': '6348260',
'display_id': '6348260', 'display_id': '6348260',
'ext': 'mp4', 'ext': 'mp4',
'duration': 1796, 'duration': 1796.76,
'title': 'Le 19h30', 'title': 'Le 19h30',
'description': '',
'uploader': '19h30',
'upload_date': '20141201', 'upload_date': '20141201',
'timestamp': 1417458600, 'timestamp': 1417458600,
'thumbnail': r're:^https?://.*\.image', 'thumbnail': r're:^https?://.*\.image',
'view_count': int,
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
} }
}, {
'url': 'http://play.swissinfo.ch/play/tv/business/video/why-people-were-against-tax-reforms?id=42960270',
'info_dict': {
'id': '42960270',
'ext': 'mp4',
'title': 'Why people were against tax reforms',
'description': 'md5:7ac442c558e9630e947427469c4b824d',
'duration': 94.0,
'upload_date': '20170215',
'timestamp': 1487173560,
'thumbnail': r're:https?://www\.swissinfo\.ch/srgscalableimage/42961964',
'subtitles': 'count:9',
},
'params': {
'skip_download': True,
}
}, { }, {
'url': 'https://www.srf.ch/play/tv/popupvideoplayer?id=c4dba0ca-e75b-43b2-a34f-f708a4932e01', 'url': 'https://www.srf.ch/play/tv/popupvideoplayer?id=c4dba0ca-e75b-43b2-a34f-f708a4932e01',
'only_matching': True, 'only_matching': True,
@ -181,6 +238,10 @@ class SRGSSRPlayIE(InfoExtractor):
}, { }, {
'url': 'https://www.rts.ch/play/tv/19h30/video/le-19h30?urn=urn:rts:video:6348260', 'url': 'https://www.rts.ch/play/tv/19h30/video/le-19h30?urn=urn:rts:video:6348260',
'only_matching': True, 'only_matching': True,
}, {
# audio segment, has podcastSdUrl of the full episode
'url': 'https://www.srf.ch/play/radio/popupaudioplayer?id=50b20dc8-f05b-4972-bf03-e438ff2833eb',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -188,5 +249,4 @@ def _real_extract(self, url):
bu = mobj.group('bu') bu = mobj.group('bu')
media_type = mobj.group('type') or mobj.group('type_2') media_type = mobj.group('type') or mobj.group('type_2')
media_id = mobj.group('id') media_id = mobj.group('id')
# other info can be extracted from url + '&layout=json'
return self.url_result('srgssr:%s:%s:%s' % (bu[:3], media_type, media_id), 'SRGSSR') return self.url_result('srgssr:%s:%s:%s' % (bu[:3], media_type, media_id), 'SRGSSR')

View file

@ -1,7 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none
class StretchInternetIE(InfoExtractor): class StretchInternetIE(InfoExtractor):
@ -11,22 +10,28 @@ class StretchInternetIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '573272', 'id': '573272',
'ext': 'mp4', 'ext': 'mp4',
'title': 'University of Mary Wrestling vs. Upper Iowa', 'title': 'UNIVERSITY OF MARY WRESTLING VS UPPER IOWA',
'timestamp': 1575668361, # 'timestamp': 1575668361,
'upload_date': '20191206', # 'upload_date': '20191206',
'uploader_id': '99997',
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
media_url = self._download_json(
'https://core.stretchlive.com/trinity/event/tcg/' + video_id,
video_id)[0]['media'][0]['url']
event = self._download_json( event = self._download_json(
'https://api.stretchinternet.com/trinity/event/tcg/' + video_id, 'https://neo-client.stretchinternet.com/portal-ws/getEvent.json',
video_id)[0] video_id, query={'eventID': video_id, 'token': 'asdf'})['event']
return { return {
'id': video_id, 'id': video_id,
'title': event['title'], 'title': event['title'],
'timestamp': int_or_none(event.get('dateCreated'), 1000), # TODO: parse US timezone abbreviations
'url': 'https://' + event['media'][0]['url'], # 'timestamp': event.get('dateTimeString'),
'url': 'https://' + media_url,
'uploader_id': event.get('ownerID'),
} }

View file

@ -21,6 +21,11 @@ class URPlayIE(InfoExtractor):
'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a', 'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a',
'timestamp': 1513292400, 'timestamp': 1513292400,
'upload_date': '20171214', 'upload_date': '20171214',
'series': 'UR Samtiden - Livet, universum och rymdens märkliga musik',
'duration': 2269,
'categories': ['Kultur & historia'],
'tags': ['Kritiskt tänkande', 'Vetenskap', 'Vetenskaplig verksamhet'],
'episode': 'Om vetenskap, kritiskt tänkande och motstånd',
}, },
}, { }, {
'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde', 'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
@ -31,6 +36,10 @@ class URPlayIE(InfoExtractor):
'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1', 'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1',
'timestamp': 1440086400, 'timestamp': 1440086400,
'upload_date': '20150820', 'upload_date': '20150820',
'series': 'Tripp, Trapp, Träd',
'duration': 865,
'tags': ['Sova'],
'episode': 'Sovkudde',
}, },
}, { }, {
'url': 'http://urskola.se/Produkter/155794-Smasagor-meankieli-Grodan-i-vida-varlden', 'url': 'http://urskola.se/Produkter/155794-Smasagor-meankieli-Grodan-i-vida-varlden',
@ -41,9 +50,11 @@ def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
url = url.replace('skola.se/Produkter', 'play.se/program') url = url.replace('skola.se/Produkter', 'play.se/program')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
urplayer_data = self._parse_json(self._html_search_regex( vid = int(video_id)
accessible_episodes = self._parse_json(self._html_search_regex(
r'data-react-class="routes/Product/components/ProgramContainer/ProgramContainer"[^>]+data-react-props="({.+?})"', r'data-react-class="routes/Product/components/ProgramContainer/ProgramContainer"[^>]+data-react-props="({.+?})"',
webpage, 'urplayer data'), video_id)['accessibleEpisodes'][0] webpage, 'urplayer data'), video_id)['accessibleEpisodes']
urplayer_data = next(e for e in accessible_episodes if e.get('id') == vid)
episode = urplayer_data['title'] episode = urplayer_data['title']
host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect'] host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect']

View file

@ -75,12 +75,15 @@ def _real_initialize(self):
'https://www.vvvvid.it/user/login', 'https://www.vvvvid.it/user/login',
None, headers=self.geo_verification_headers())['data']['conn_id'] None, headers=self.geo_verification_headers())['data']['conn_id']
def _download_info(self, show_id, path, video_id, fatal=True): def _download_info(self, show_id, path, video_id, fatal=True, query=None):
q = {
'conn_id': self._conn_id,
}
if query:
q.update(query)
response = self._download_json( response = self._download_json(
'https://www.vvvvid.it/vvvvid/ondemand/%s/%s' % (show_id, path), 'https://www.vvvvid.it/vvvvid/ondemand/%s/%s' % (show_id, path),
video_id, headers=self.geo_verification_headers(), query={ video_id, headers=self.geo_verification_headers(), query=q, fatal=fatal)
'conn_id': self._conn_id,
}, fatal=fatal)
if not (response or fatal): if not (response or fatal):
return return
if response.get('result') == 'error': if response.get('result') == 'error':
@ -98,7 +101,8 @@ def _real_extract(self, url):
show_id, season_id, video_id = re.match(self._VALID_URL, url).groups() show_id, season_id, video_id = re.match(self._VALID_URL, url).groups()
response = self._download_info( response = self._download_info(
show_id, 'season/%s' % season_id, video_id) show_id, 'season/%s' % season_id,
video_id, query={'video_id': video_id})
vid = int(video_id) vid = int(video_id)
video_data = list(filter( video_data = list(filter(
@ -247,9 +251,13 @@ def _real_extract(self, url):
show_info = self._download_info( show_info = self._download_info(
show_id, 'info/', show_title, fatal=False) show_id, 'info/', show_title, fatal=False)
if not show_title:
base_url += "/title"
entries = [] entries = []
for season in (seasons or []): for season in (seasons or []):
episodes = season.get('episodes') or [] episodes = season.get('episodes') or []
playlist_title = season.get('name') or show_info.get('title')
for episode in episodes: for episode in episodes:
if episode.get('playable') is False: if episode.get('playable') is False:
continue continue
@ -259,12 +267,13 @@ def _real_extract(self, url):
continue continue
info = self._extract_common_video_info(episode) info = self._extract_common_video_info(episode)
info.update({ info.update({
'_type': 'url', '_type': 'url_transparent',
'ie_key': VVVVIDIE.ie_key(), 'ie_key': VVVVIDIE.ie_key(),
'url': '/'.join([base_url, season_id, video_id]), 'url': '/'.join([base_url, season_id, video_id]),
'title': episode.get('title'), 'title': episode.get('title'),
'description': episode.get('description'), 'description': episode.get('description'),
'season_id': season_id, 'season_id': season_id,
'playlist_title': playlist_title,
}) })
entries.append(info) entries.append(info)

View file

@ -7,7 +7,9 @@
from ..compat import compat_str from ..compat import compat_str
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
float_or_none,
int_or_none, int_or_none,
merge_dicts,
NO_DEFAULT, NO_DEFAULT,
orderedSet, orderedSet,
parse_codecs, parse_codecs,
@ -21,61 +23,17 @@
class ZDFBaseIE(InfoExtractor): class ZDFBaseIE(InfoExtractor):
def _call_api(self, url, player, referrer, video_id, item):
return self._download_json(
url, video_id, 'Downloading JSON %s' % item,
headers={
'Referer': referrer,
'Api-Auth': 'Bearer %s' % player['apiToken'],
})
def _extract_player(self, webpage, video_id, fatal=True):
return self._parse_json(
self._search_regex(
r'(?s)data-zdfplayer-jsb=(["\'])(?P<json>{.+?})\1', webpage,
'player JSON', default='{}' if not fatal else NO_DEFAULT,
group='json'),
video_id)
class ZDFIE(ZDFBaseIE):
IE_NAME = "ZDF-3sat"
_VALID_URL = r'https?://www\.(zdf|3sat)\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html'
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd')
_GEO_COUNTRIES = ['DE'] _GEO_COUNTRIES = ['DE']
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd')
_TESTS = [{ def _call_api(self, url, video_id, item, api_token=None, referrer=None):
'url': 'https://www.3sat.de/wissen/wissenschaftsdoku/luxusgut-lebensraum-100.html', headers = {}
'info_dict': { if api_token:
'id': 'luxusgut-lebensraum-100', headers['Api-Auth'] = 'Bearer %s' % api_token
'ext': 'mp4', if referrer:
'title': 'Luxusgut Lebensraum', headers['Referer'] = referrer
'description': 'md5:5c09b2f45ac3bc5233d1b50fc543d061', return self._download_json(
'duration': 2601, url, video_id, 'Downloading JSON %s' % item, headers=headers)
'timestamp': 1566497700,
'upload_date': '20190822',
}
}, {
'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
'info_dict': {
'id': 'die-magie-der-farben-von-koenigspurpur-und-jeansblau-100',
'ext': 'mp4',
'title': 'Die Magie der Farben (2/2)',
'description': 'md5:a89da10c928c6235401066b60a6d5c1a',
'duration': 2615,
'timestamp': 1465021200,
'upload_date': '20160604',
},
}, {
'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html',
'only_matching': True,
}, {
'url': 'https://www.zdf.de/filme/taunuskrimi/die-lebenden-und-die-toten-1---ein-taunuskrimi-100.html',
'only_matching': True,
}, {
'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
'only_matching': True,
}]
@staticmethod @staticmethod
def _extract_subtitles(src): def _extract_subtitles(src):
@ -121,20 +79,11 @@ def _extract_format(self, video_id, formats, format_urls, meta):
}) })
formats.append(f) formats.append(f)
def _extract_entry(self, url, player, content, video_id): def _extract_ptmd(self, ptmd_url, video_id, api_token, referrer):
title = content.get('title') or content['teaserHeadline']
t = content['mainVideoContent']['http://zdf.de/rels/target']
ptmd_path = t.get('http://zdf.de/rels/streams/ptmd')
if not ptmd_path:
ptmd_path = t[
'http://zdf.de/rels/streams/ptmd-template'].replace(
'{playerId}', 'ngplayer_2_4')
ptmd = self._call_api( ptmd = self._call_api(
urljoin(url, ptmd_path), player, url, video_id, 'metadata') ptmd_url, video_id, 'metadata', api_token, referrer)
content_id = ptmd.get('basename') or ptmd_url.split('/')[-1]
formats = [] formats = []
track_uris = set() track_uris = set()
@ -152,7 +101,7 @@ def _extract_entry(self, url, player, content, video_id):
continue continue
for track in tracks: for track in tracks:
self._extract_format( self._extract_format(
video_id, formats, track_uris, { content_id, formats, track_uris, {
'url': track.get('uri'), 'url': track.get('uri'),
'type': f.get('type'), 'type': f.get('type'),
'mimeType': f.get('mimeType'), 'mimeType': f.get('mimeType'),
@ -161,6 +110,103 @@ def _extract_entry(self, url, player, content, video_id):
}) })
self._sort_formats(formats) self._sort_formats(formats)
duration = float_or_none(try_get(
ptmd, lambda x: x['attributes']['duration']['value']), scale=1000)
return {
'extractor_key': ZDFIE.ie_key(),
'id': content_id,
'duration': duration,
'formats': formats,
'subtitles': self._extract_subtitles(ptmd),
}
def _extract_player(self, webpage, video_id, fatal=True):
return self._parse_json(
self._search_regex(
r'(?s)data-zdfplayer-jsb=(["\'])(?P<json>{.+?})\1', webpage,
'player JSON', default='{}' if not fatal else NO_DEFAULT,
group='json'),
video_id)
class ZDFIE(ZDFBaseIE):
_VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
_TESTS = [{
# Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html
'url': 'https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html',
'md5': '34ec321e7eb34231fd88616c65c92db0',
'info_dict': {
'id': '210222_phx_nachgehakt_corona_protest',
'ext': 'mp4',
'title': 'Wohin führt der Protest in der Pandemie?',
'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
'duration': 1691,
'timestamp': 1613948400,
'upload_date': '20210221',
},
}, {
# Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html
'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html',
'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
'info_dict': {
'id': '141007_ab18_10wochensommer_film',
'ext': 'mp4',
'title': 'Ab 18! - 10 Wochen Sommer',
'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26',
'duration': 2660,
'timestamp': 1608604200,
'upload_date': '20201222',
},
}, {
'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
'info_dict': {
'id': '151025_magie_farben2_tex',
'ext': 'mp4',
'title': 'Die Magie der Farben (2/2)',
'description': 'md5:a89da10c928c6235401066b60a6d5c1a',
'duration': 2615,
'timestamp': 1465021200,
'upload_date': '20160604',
},
}, {
# Same as https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche
'url': 'https://www.zdf.de/politik/phoenix-sendungen/die-gesten-der-maechtigen-100.html',
'only_matching': True,
}, {
# Same as https://www.3sat.de/film/spielfilm/der-hauptmann-100.html
'url': 'https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html',
'only_matching': True,
}, {
# Same as https://www.3sat.de/wissen/nano/nano-21-mai-2019-102.html, equal media ids
'url': 'https://www.zdf.de/wissen/nano/nano-21-mai-2019-102.html',
'only_matching': True,
}, {
'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html',
'only_matching': True,
}, {
'url': 'https://www.zdf.de/filme/taunuskrimi/die-lebenden-und-die-toten-1---ein-taunuskrimi-100.html',
'only_matching': True,
}, {
'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
'only_matching': True,
}]
def _extract_entry(self, url, player, content, video_id):
title = content.get('title') or content['teaserHeadline']
t = content['mainVideoContent']['http://zdf.de/rels/target']
ptmd_path = t.get('http://zdf.de/rels/streams/ptmd')
if not ptmd_path:
ptmd_path = t[
'http://zdf.de/rels/streams/ptmd-template'].replace(
'{playerId}', 'ngplayer_2_4')
info = self._extract_ptmd(
urljoin(url, ptmd_path), video_id, player['apiToken'], url)
thumbnails = [] thumbnails = []
layouts = try_get( layouts = try_get(
content, lambda x: x['teaserImageRef']['layouts'], dict) content, lambda x: x['teaserImageRef']['layouts'], dict)
@ -181,33 +227,33 @@ def _extract_entry(self, url, player, content, video_id):
}) })
thumbnails.append(thumbnail) thumbnails.append(thumbnail)
return { return merge_dicts(info, {
'id': video_id,
'title': title, 'title': title,
'description': content.get('leadParagraph') or content.get('teasertext'), 'description': content.get('leadParagraph') or content.get('teasertext'),
'duration': int_or_none(t.get('duration')), 'duration': int_or_none(t.get('duration')),
'timestamp': unified_timestamp(content.get('editorialDate')), 'timestamp': unified_timestamp(content.get('editorialDate')),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'subtitles': self._extract_subtitles(ptmd), })
'formats': formats,
}
def _extract_regular(self, url, player, video_id): def _extract_regular(self, url, player, video_id):
content = self._call_api( content = self._call_api(
player['content'], player, url, video_id, 'content') player['content'], video_id, 'content', player['apiToken'], url)
return self._extract_entry(player['content'], player, content, video_id) return self._extract_entry(player['content'], player, content, video_id)
def _extract_mobile(self, video_id): def _extract_mobile(self, video_id):
document = self._download_json( video = self._download_json(
'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id, 'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id,
video_id)['document'] video_id)
document = video['document']
title = document['titel'] title = document['titel']
content_id = document['basename']
formats = [] formats = []
format_urls = set() format_urls = set()
for f in document['formitaeten']: for f in document['formitaeten']:
self._extract_format(video_id, formats, format_urls, f) self._extract_format(content_id, formats, format_urls, f)
self._sort_formats(formats) self._sort_formats(formats)
thumbnails = [] thumbnails = []
@ -225,12 +271,12 @@ def _extract_mobile(self, video_id):
}) })
return { return {
'id': video_id, 'id': content_id,
'title': title, 'title': title,
'description': document.get('beschreibung'), 'description': document.get('beschreibung'),
'duration': int_or_none(document.get('length')), 'duration': int_or_none(document.get('length')),
'timestamp': unified_timestamp(try_get( 'timestamp': unified_timestamp(document.get('date')) or unified_timestamp(
document, lambda x: x['meta']['editorialDate'], compat_str)), try_get(video, lambda x: x['meta']['editorialDate'], compat_str)),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'subtitles': self._extract_subtitles(document), 'subtitles': self._extract_subtitles(document),
'formats': formats, 'formats': formats,