Add test. Clear dead VRT extractors

This commit is contained in:
bergoid 2022-09-15 23:55:07 +02:00
parent 0e5d60d206
commit f0075841e8
4 changed files with 159 additions and 507 deletions

View file

@ -246,12 +246,6 @@
from .canalalpha import CanalAlphaIE from .canalalpha import CanalAlphaIE
from .canalplus import CanalplusIE from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE from .canalc2 import Canalc2IE
from .canvas import (
CanvasIE,
CanvasEenIE,
VrtNUIE,
DagelijkseKostIE,
)
from .carambatv import ( from .carambatv import (
CarambaTVIE, CarambaTVIE,
CarambaTVPageIE, CarambaTVPageIE,
@ -786,7 +780,6 @@
from .karrierevideos import KarriereVideosIE from .karrierevideos import KarriereVideosIE
from .keezmovies import KeezMoviesIE from .keezmovies import KeezMoviesIE
from .kelbyone import KelbyOneIE from .kelbyone import KelbyOneIE
from .ketnet import KetnetIE
from .khanacademy import ( from .khanacademy import (
KhanAcademyIE, KhanAcademyIE,
KhanAcademyUnitIE, KhanAcademyUnitIE,

View file

@ -1,367 +0,0 @@
import json
from .common import InfoExtractor
from .gigya import GigyaBaseIE
from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
clean_html,
float_or_none,
get_element_by_class,
str_or_none,
strip_or_none,
url_or_none,
urlencode_postdata
)
class CanvasIE(InfoExtractor):
_VALID_URL = r'https://media-services-public\.vrt\.be/media-aggregator/v2/media-items/(?P<video_id>.+)'
_TESTS = [{
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'md5': '37b2b7bb9b3dcaa05b67058dc3a714a9',
'info_dict': {
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'ext': 'mp4',
'title': 'Nachtwacht: De Greystook',
'description': 'Nachtwacht: De Greystook',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1468.02,
},
'expected_warnings': ['is not a supported codec'],
}, {
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
'only_matching': True,
}]
_GEO_BYPASS = False
_HLS_ENTRY_PROTOCOLS_MAP = {
'HLS': 'm3u8_native',
'HLS_AES': 'm3u8_native',
}
_REST_API_BASE_TOKEN = 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v2'
_REST_API_BASE_VIDEO = 'https://media-services-public.vrt.be/media-aggregator/v2'
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('video_id')
data = None
vrtnutoken = self._download_json('https://token.vrt.be/refreshtoken',
video_id, note='refreshtoken: Retrieve vrtnutoken',
errnote='refreshtoken failed')['vrtnutoken']
headers = self.geo_verification_headers()
headers.update({'Content-Type': 'application/json; charset=utf-8'})
vrtPlayerToken = self._download_json(
'%s/tokens' % self._REST_API_BASE_TOKEN, video_id,
'Downloading token', headers=headers, data=json.dumps({
'identityToken': vrtnutoken
}).encode('utf-8'))['vrtPlayerToken']
data = self._download_json(
'%s/media-items/%s' % (self._REST_API_BASE_VIDEO, video_id),
video_id, 'Downloading video JSON', query={
'vrtPlayerToken': vrtPlayerToken,
'client': 'vrtnu-web@PROD',
}, expected_status=400)
if 'title' not in data:
code = data.get('code')
if code == 'AUTHENTICATION_REQUIRED':
self.raise_login_required()
elif code == 'INVALID_LOCATION':
self.raise_geo_restricted(countries=['BE'])
raise ExtractorError(data.get('message') or code, expected=True)
# Note: The title may be an empty string
title = data['title'] or f'{video_id}'
description = data.get('description')
formats = []
subtitles = {}
for target in data['targetUrls']:
format_url, format_type = url_or_none(target.get('url')), str_or_none(target.get('type'))
if not format_url or not format_type:
continue
format_type = format_type.upper()
if format_type in self._HLS_ENTRY_PROTOCOLS_MAP:
fmts, subs = self._extract_m3u8_formats_and_subtitles(
format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type],
m3u8_id=format_type, fatal=False)
formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs)
elif format_type == 'HDS':
formats.extend(self._extract_f4m_formats(
format_url, video_id, f4m_id=format_type, fatal=False))
elif format_type == 'MPEG_DASH':
fmts, subs = self._extract_mpd_formats_and_subtitles(
format_url, video_id, mpd_id=format_type, fatal=False)
formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs)
elif format_type == 'HSS':
fmts, subs = self._extract_ism_formats_and_subtitles(
format_url, video_id, ism_id='mss', fatal=False)
formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs)
else:
formats.append({
'format_id': format_type,
'url': format_url,
})
self._sort_formats(formats)
subtitle_urls = data.get('subtitleUrls')
if isinstance(subtitle_urls, list):
for subtitle in subtitle_urls:
subtitle_url = subtitle.get('url')
if subtitle_url and subtitle.get('type') == 'CLOSED':
subtitles.setdefault('nl', []).append({'url': subtitle_url})
return {
'id': video_id,
'display_id': video_id,
'title': title,
'description': description,
'formats': formats,
'duration': float_or_none(data.get('duration'), 1000),
'thumbnail': data.get('posterImageUrl'),
'subtitles': subtitles,
}
class CanvasEenIE(InfoExtractor):
IE_DESC = 'canvas.be and een.be'
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
'md5': 'ed66976748d12350b118455979cca293',
'info_dict': {
'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
'display_id': 'de-afspraak-veilt-voor-de-warmste-week',
'ext': 'flv',
'title': 'De afspraak veilt voor de Warmste Week',
'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 49.02,
},
'expected_warnings': ['is not a supported codec'],
}, {
# with subtitles
'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167',
'info_dict': {
'id': 'mz-ast-5240ff21-2d30-4101-bba6-92b5ec67c625',
'display_id': 'pieter-0167',
'ext': 'mp4',
'title': 'Pieter 0167',
'description': 'md5:943cd30f48a5d29ba02c3a104dc4ec4e',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2553.08,
'subtitles': {
'nl': [{
'ext': 'vtt',
}],
},
},
'params': {
'skip_download': True,
},
'skip': 'Pagina niet gevonden',
}, {
'url': 'https://www.een.be/thuis/emma-pakt-thilly-aan',
'info_dict': {
'id': 'md-ast-3a24ced2-64d7-44fb-b4ed-ed1aafbf90b8',
'display_id': 'emma-pakt-thilly-aan',
'ext': 'mp4',
'title': 'Emma pakt Thilly aan',
'description': 'md5:c5c9b572388a99b2690030afa3f3bad7',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 118.24,
},
'params': {
'skip_download': True,
},
'expected_warnings': ['is not a supported codec'],
}, {
'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
site_id, display_id = mobj.group('site_id'), mobj.group('id')
webpage = self._download_webpage(url, display_id)
title = strip_or_none(self._search_regex(
r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
webpage, 'title', default=None) or self._og_search_title(
webpage, default=None))
video_id = self._html_search_regex(
r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
group='id')
return {
'_type': 'url_transparent',
'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (site_id, video_id),
'ie_key': CanvasIE.ie_key(),
'id': video_id,
'display_id': display_id,
'title': title,
'description': self._og_search_description(webpage),
}
class VrtNUIE(GigyaBaseIE):
IE_DESC = 'VrtNU.be'
_VALID_URL = r'https?://(?:www\.)?vrt\.be/vrtnu/a-z/(?P<show>.+)/(?P<year>[0-9]{4})/(?P<id>.+)/?'
_TESTS = [{
# Available via old API endpoint
'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1989/postbus-x-s1989a1/',
'info_dict': {
'id': 'pbs-pub-e8713dac-899e-41de-9313-81269f4c04ac$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
'ext': 'mp4',
'title': 'Postbus X - Aflevering 1 (Seizoen 1989)',
'description': 'md5:b704f669eb9262da4c55b33d7c6ed4b7',
'duration': 1457.04,
'thumbnail': r're:^https?://.*\.jpg$',
'series': 'Postbus X',
'season': 'Seizoen 1989',
'season_number': 1989,
'episode': 'De zwarte weduwe',
'episode_number': 1,
'timestamp': 1595822400,
'upload_date': '20200727',
},
'skip': 'This video is only available for registered users',
'expected_warnings': ['is not a supported codec'],
}, {
# Only available via new API endpoint
'url': 'https://www.vrt.be/vrtnu/a-z/kamp-waes/1/kamp-waes-s1a5/',
'info_dict': {
'id': 'pbs-pub-0763b56c-64fb-4d38-b95b-af60bf433c71$vid-ad36a73c-4735-4f1f-b2c0-a38e6e6aa7e1',
'ext': 'mp4',
'title': 'Aflevering 5',
'description': 'Wie valt door de mand tijdens een missie?',
'duration': 2967.06,
'season': 'Season 1',
'season_number': 1,
'episode_number': 5,
},
'skip': 'This video is only available for registered users',
'expected_warnings': ['Unable to download asset JSON', 'is not a supported codec', 'Unknown MIME type'],
}]
_NETRC_MACHINE = 'vrtnu'
_APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
_CONTEXT_ID = 'R3595707040'
def _perform_login(self, username, password):
auth_info = self._gigya_login({
'APIKey': self._APIKEY,
'targetEnv': 'jssdk',
'loginID': username,
'password': password,
'authMode': 'cookie',
})
if auth_info.get('errorDetails'):
raise ExtractorError('Unable to login: VrtNU said: ' + auth_info.get('errorDetails'), expected=True)
# Sometimes authentication fails for no good reason, retry
login_attempt = 1
while login_attempt <= 3:
try:
self._request_webpage('https://token.vrt.be/vrtnuinitlogin',
None, note='Requesting XSRF Token', errnote='Could not get XSRF Token',
query={'provider': 'site', 'destination': 'https://www.vrt.be/vrtnu/'})
post_data = {
'UID': auth_info['UID'],
'UIDSignature': auth_info['UIDSignature'],
'signatureTimestamp': auth_info['signatureTimestamp'],
'_csrf': self._get_cookies('https://login.vrt.be').get('OIDCXSRF').value,
}
self._request_webpage(
'https://login.vrt.be/perform_login',
None, note='Performing login', errnote='perform login failed',
headers={}, query={
'client_id': 'vrtnu-site'
}, data=urlencode_postdata(post_data))
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
login_attempt += 1
self.report_warning('Authentication failed')
self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again')
else:
raise e
else:
break
def _real_extract(self, url):
display_id = self._match_id(url)
episode_data = self._download_json(f'{url.strip("/")}.model.json', display_id,
'Downloading asset JSON', 'Unable to download asset JSON')
details = episode_data.get('details')
actions = details.get('actions')
episode_publication_id = actions[2].get('episodePublicationId')
episode_video_id = actions[2].get('episodeVideoId')
video_id = f'{episode_publication_id}${episode_video_id}'
return {
'_type': 'url_transparent',
'url': 'https://media-services-public.vrt.be/media-aggregator/v2/media-items/%s' % video_id,
'ie_key': 'Canvas',
'id': video_id,
'display_id': display_id
}
class DagelijkseKostIE(InfoExtractor):
IE_DESC = 'dagelijksekost.een.be'
_VALID_URL = r'https?://dagelijksekost\.een\.be/gerechten/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof',
'md5': '30bfffc323009a3e5f689bef6efa2365',
'info_dict': {
'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa',
'display_id': 'hachis-parmentier-met-witloof',
'ext': 'mp4',
'title': 'Hachis parmentier met witloof',
'description': 'md5:9960478392d87f63567b5b117688cdc5',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 283.02,
},
'expected_warnings': ['is not a supported codec'],
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
title = strip_or_none(get_element_by_class(
'dish-metadata__title', webpage
) or self._html_search_meta(
'twitter:title', webpage))
description = clean_html(get_element_by_class(
'dish-description', webpage)
) or self._html_search_meta(
('description', 'twitter:description', 'og:description'),
webpage)
video_id = self._html_search_regex(
r'data-url=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
group='id')
return {
'_type': 'url_transparent',
'url': 'https://mediazone.vrt.be/api/v1/dako/assets/%s' % video_id,
'ie_key': CanvasIE.ie_key(),
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
}

View file

@ -1,70 +0,0 @@
from .canvas import CanvasIE
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
from ..utils import (
int_or_none,
parse_iso8601,
)
class KetnetIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?P<id>(?:[^/]+/)*[^/?#&]+)'
_TESTS = [{
'url': 'https://www.ketnet.be/kijken/n/nachtwacht/3/nachtwacht-s3a1-de-greystook',
'md5': '37b2b7bb9b3dcaa05b67058dc3a714a9',
'info_dict': {
'id': 'pbs-pub-aef8b526-115e-4006-aa24-e59ff6c6ef6f$vid-ddb815bf-c8e7-467b-8879-6bad7a32cebd',
'ext': 'mp4',
'title': 'Nachtwacht - Reeks 3: Aflevering 1',
'description': 'De Nachtwacht krijgt te maken met een parasiet',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1468.02,
'timestamp': 1609225200,
'upload_date': '20201229',
'series': 'Nachtwacht',
'season': 'Reeks 3',
'episode': 'De Greystook',
'episode_number': 1,
},
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
}, {
'url': 'https://www.ketnet.be/themas/karrewiet/jaaroverzicht-20200/karrewiet-het-jaar-van-black-mamba',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
video = self._download_json(
'https://senior-bff.ketnet.be/graphql', display_id, query={
'query': '''{
video(id: "content/ketnet/nl/%s.model.json") {
description
episodeNr
imageUrl
mediaReference
programTitle
publicationDate
seasonTitle
subtitleVideodetail
titleVideodetail
}
}''' % display_id,
})['data']['video']
mz_id = compat_urllib_parse_unquote(video['mediaReference'])
return {
'_type': 'url_transparent',
'id': mz_id,
'title': video['titleVideodetail'],
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/' + mz_id,
'thumbnail': video.get('imageUrl'),
'description': video.get('description'),
'timestamp': parse_iso8601(video.get('publicationDate')),
'series': video.get('programTitle'),
'season': video.get('seasonTitle'),
'episode': video.get('subtitleVideodetail'),
'episode_number': int_or_none(video.get('episodeNr')),
'ie_key': CanvasIE.ie_key(),
}

View file

@ -1,82 +1,178 @@
from .common import InfoExtractor import json
from .gigya import GigyaBaseIE
from ..compat import compat_HTTPError
from ..utils import ( from ..utils import (
extract_attributes, ExtractorError,
float_or_none, float_or_none,
get_element_by_class, str_or_none,
strip_or_none, url_or_none,
unified_timestamp, urlencode_postdata
) )
class VRTIE(InfoExtractor): class VRTIE(GigyaBaseIE):
IE_DESC = 'VRT NWS, Flanders News, Flandern Info and Sporza' IE_DESC = 'VRT'
_VALID_URL = r'https?://(?:www\.)?(?P<site>vrt\.be/vrtnws|sporza\.be)/[a-z]{2}/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)' _VALID_URL = r'https?://(?:www\.)?vrt\.be/vrtnu/a-z/(?P<id>.+)/?'
_TESTS = [{ _TESTS = [{
'url': 'https://www.vrt.be/vrtnws/nl/2019/05/15/beelden-van-binnenkant-notre-dame-een-maand-na-de-brand/', 'url': 'https://www.vrt.be/vrtnu/a-z/heizel-1985/trailer/heizel-1985-trailer/',
'md5': 'e1663accf5cf13f375f3cd0d10476669',
'info_dict': { 'info_dict': {
'id': 'pbs-pub-7855fc7b-1448-49bc-b073-316cb60caa71$vid-2ca50305-c38a-4762-9890-65cbd098b7bd', 'id': 'pbs-pub-e1d6e4ec-cbf4-451e-9e87-d835bb65cd28$vid-2ad45eb6-9bc8-40d4-ad72-5f25c0f59d75',
'ext': 'mp4', 'title': 'Trailer \'Heizel 1985\'',
'title': 'Beelden van binnenkant Notre-Dame, één maand na de brand', 'thumbnail': 'https://images.vrt.be/orig/2022/09/07/6e44ce6f-2eb3-11ed-b07d-02b7b76bf47f.jpg',
'description': 'Op maandagavond 15 april ging een deel van het dakgebinte van de Parijse kathedraal in vlammen op.', 'duration': 41.05
'timestamp': 1557924660,
'upload_date': '20190515',
'duration': 31.2,
}, },
}, {
'url': 'https://sporza.be/nl/2019/05/15/de-belgian-cats-zijn-klaar-voor-het-ek/',
'md5': '910bba927566e9ab992278f647eb4b75',
'info_dict': {
'id': 'pbs-pub-f2c86a46-8138-413a-a4b9-a0015a16ce2c$vid-1f112b31-e58e-4379-908d-aca6d80f8818',
'ext': 'mp4',
'title': 'De Belgian Cats zijn klaar voor het EK mét Ann Wauters',
'timestamp': 1557923760,
'upload_date': '20190515',
'duration': 115.17,
},
}, {
'url': 'https://www.vrt.be/vrtnws/en/2019/05/15/belgium_s-eurovision-entry-falls-at-the-first-hurdle/',
'only_matching': True,
}, {
'url': 'https://www.vrt.be/vrtnws/de/2019/05/15/aus-fuer-eliott-im-halbfinale-des-eurosongfestivals/',
'only_matching': True,
}] }]
_CLIENT_MAP = { _NETRC_MACHINE = 'vrtnu'
'vrt.be/vrtnws': 'vrtnieuws', _APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
'sporza.be': 'sporza', _CONTEXT_ID = 'R3595707040'
_REST_API_BASE_TOKEN = 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v2'
_REST_API_BASE_VIDEO = 'https://media-services-public.vrt.be/media-aggregator/v2'
_HLS_ENTRY_PROTOCOLS_MAP = {
'HLS': 'm3u8_native',
'HLS_AES': 'm3u8_native',
} }
_authenticated = False
def _perform_login(self, username, password):
auth_info = self._gigya_login({
'APIKey': self._APIKEY,
'targetEnv': 'jssdk',
'loginID': username,
'password': password,
'authMode': 'cookie',
})
if auth_info.get('errorDetails'):
raise ExtractorError('Unable to login: VrtNU said: ' + auth_info.get('errorDetails'), expected=True)
# Sometimes authentication fails for no good reason, retry
login_attempt = 1
while login_attempt <= 3:
try:
self._request_webpage('https://token.vrt.be/vrtnuinitlogin',
None, note='Requesting XSRF Token', errnote='Could not get XSRF Token',
query={'provider': 'site', 'destination': 'https://www.vrt.be/vrtnu/'})
post_data = {
'UID': auth_info['UID'],
'UIDSignature': auth_info['UIDSignature'],
'signatureTimestamp': auth_info['signatureTimestamp'],
'_csrf': self._get_cookies('https://login.vrt.be').get('OIDCXSRF').value,
}
self._request_webpage(
'https://login.vrt.be/perform_login',
None, note='Performing login', errnote='perform login failed',
headers={}, query={
'client_id': 'vrtnu-site'
}, data=urlencode_postdata(post_data))
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
login_attempt += 1
self.report_warning('Authentication failed')
self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again')
else:
raise e
else:
break
self._authenticated = True
def _real_extract(self, url): def _real_extract(self, url):
site, display_id = self._match_valid_url(url).groups() display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
attrs = extract_attributes(self._search_regex(
r'(<[^>]+class="vrtvideo( [^"]*)?"[^>]*>)', webpage, 'vrt video'))
asset_id = attrs['data-video-id'] episode_data = self._download_json(f'{url.strip("/")}.model.json', display_id,
publication_id = attrs.get('data-publication-id') 'Downloading asset JSON', 'Unable to download asset JSON')
if publication_id: details = episode_data.get('details')
asset_id = publication_id + '$' + asset_id actions = details.get('actions')
client = attrs.get('data-client-code') or self._CLIENT_MAP[site] episode_publication_id = actions[2].get('episodePublicationId')
episode_video_id = actions[2].get('episodeVideoId')
video_id = f'{episode_publication_id}${episode_video_id}'
title = strip_or_none(get_element_by_class( data = None
'vrt-title', webpage) or self._html_search_meta( vrtnutoken = ""
['og:title', 'twitter:title', 'name'], webpage))
description = self._html_search_meta( if self._authenticated:
['og:description', 'twitter:description', 'description'], webpage) vrtnutoken = self._download_json('https://token.vrt.be/refreshtoken',
if description == '': video_id, note='refreshtoken: Retrieve vrtnutoken',
description = None errnote='refreshtoken failed')['vrtnutoken']
timestamp = unified_timestamp(self._html_search_meta(
'article:published_time', webpage)) headers = self.geo_verification_headers()
headers.update({'Content-Type': 'application/json; charset=utf-8'})
vrtPlayerToken = self._download_json(
'%s/tokens' % self._REST_API_BASE_TOKEN, video_id,
'Downloading token', headers=headers, data=json.dumps({
'identityToken': vrtnutoken
}).encode('utf-8'))['vrtPlayerToken']
data = self._download_json(
'%s/media-items/%s' % (self._REST_API_BASE_VIDEO, video_id),
video_id, 'Downloading video JSON', query={
'vrtPlayerToken': vrtPlayerToken,
'client': 'vrtnu-web@PROD',
}, expected_status=400)
if 'title' not in data:
code = data.get('code')
if code == 'AUTHENTICATION_REQUIRED':
self.raise_login_required()
elif code == 'INVALID_LOCATION':
self.raise_geo_restricted(countries=['BE'])
raise ExtractorError(data.get('message') or code, expected=True)
# Note: The title may be an empty string
title = data['title'] or f'{video_id}'
description = data.get('description')
formats = []
subtitles = {}
for target in data['targetUrls']:
format_url, format_type = url_or_none(target.get('url')), str_or_none(target.get('type'))
if not format_url or not format_type:
continue
format_type = format_type.upper()
if format_type in self._HLS_ENTRY_PROTOCOLS_MAP:
fmts, subs = self._extract_m3u8_formats_and_subtitles(
format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type],
m3u8_id=format_type, fatal=False)
formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs)
elif format_type == 'HDS':
formats.extend(self._extract_f4m_formats(
format_url, video_id, f4m_id=format_type, fatal=False))
elif format_type == 'MPEG_DASH':
fmts, subs = self._extract_mpd_formats_and_subtitles(
format_url, video_id, mpd_id=format_type, fatal=False)
formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs)
elif format_type == 'HSS':
fmts, subs = self._extract_ism_formats_and_subtitles(
format_url, video_id, ism_id='mss', fatal=False)
formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs)
else:
formats.append({
'format_id': format_type,
'url': format_url,
})
self._sort_formats(formats)
subtitle_urls = data.get('subtitleUrls')
if isinstance(subtitle_urls, list):
for subtitle in subtitle_urls:
subtitle_url = subtitle.get('url')
if subtitle_url and subtitle.get('type') == 'CLOSED':
subtitles.setdefault('nl', []).append({'url': subtitle_url})
return { return {
'_type': 'url_transparent', 'id': video_id,
'id': asset_id, 'display_id': video_id,
'display_id': display_id,
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': attrs.get('data-posterimage'), 'formats': formats,
'timestamp': timestamp, 'duration': float_or_none(data.get('duration'), 1000),
'duration': float_or_none(attrs.get('data-duration'), 1000), 'thumbnail': data.get('posterImageUrl'),
'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (client, asset_id), 'subtitles': subtitles,
'ie_key': 'Canvas',
} }