2023-06-02 18:29:00 +00:00
import functools
2022-09-15 21:55:07 +00:00
import json
2023-09-17 08:15:38 +00:00
from http . cookiejar import CookieJar
2023-06-02 18:29:00 +00:00
import urllib . parse
2023-09-17 08:15:38 +00:00
import urllib . request
2022-09-15 21:55:07 +00:00
from . gigya import GigyaBaseIE
2016-04-07 17:22:43 +00:00
from . . utils import (
2022-09-15 21:55:07 +00:00
ExtractorError ,
2023-06-02 18:29:00 +00:00
clean_html ,
2019-05-15 14:02:51 +00:00
extract_attributes ,
2016-04-07 17:22:43 +00:00
float_or_none ,
2019-05-15 14:02:51 +00:00
get_element_by_class ,
2023-06-02 18:29:00 +00:00
get_element_html_by_class ,
int_or_none ,
2023-09-21 20:13:24 +00:00
# join_nonempty,
# jwt_encode_hs256,
2023-06-02 18:29:00 +00:00
make_archive_id ,
parse_age_limit ,
parse_iso8601 ,
2022-09-15 21:55:07 +00:00
str_or_none ,
2019-05-15 14:02:51 +00:00
strip_or_none ,
2023-06-02 18:29:00 +00:00
traverse_obj ,
2022-09-15 21:55:07 +00:00
url_or_none ,
2023-06-02 18:29:00 +00:00
urlencode_postdata ,
2016-04-07 17:22:43 +00:00
)
2014-10-26 09:48:11 +00:00
2023-11-04 20:49:11 +00:00
def parse_year ( timestamp ) :
""" Return the first 4 characters as an int """
if isinstance ( timestamp , str ) and len ( timestamp ) > = 4 :
return int_or_none ( timestamp [ : 4 ] )
else :
return None
2014-10-26 09:48:11 +00:00
2023-06-02 18:29:00 +00:00
class VRTBaseIE ( GigyaBaseIE ) :
_GEO_BYPASS = False
2023-09-17 21:17:58 +00:00
2023-09-20 22:11:48 +00:00
# _PLAYER_INFO = {
# 'platform': 'desktop',
# 'app': {
# 'type': 'browser',
# 'name': 'Chrome'
# },
# 'device': 'undefined (undefined)',
# 'os': {
# 'name': 'Windows',
# 'version': 'x86_64'
# },
# 'player': {
# 'name': 'VRT web player',
# 'version': '3.2.6-prod-2023-09-11T12:37:41'
# }
# }
2023-09-17 21:17:58 +00:00
2023-11-04 20:49:11 +00:00
_VIDEOPAGE_QUERY = " query VideoPage($pageId: ID!) { \n page(id: $pageId) { \n ... on EpisodePage { \n id \n title \n seo { \n ...seoFragment \n __typename \n } \n ldjson \n episode { \n onTimeRaw \n ageRaw \n name \n episodeNumberRaw \n program { \n title \n __typename \n } \n watchAction { \n streamId \n __typename \n } \n __typename \n } \n __typename \n } \n __typename \n } \n } \n fragment seoFragment on SeoProperties { \n __typename \n title \n description \n } "
2023-09-17 21:17:58 +00:00
2023-07-10 13:15:47 +00:00
# From https://player.vrt.be/vrtnws/js/main.js & https://player.vrt.be/ketnet/js/main.8cdb11341bcb79e4cd44.js
2023-06-02 18:29:00 +00:00
_JWT_KEY_ID = ' 0-0Fp51UZykfaiCJrfTE3+oMI8zvDteYfPtR+2n1R+z8w= '
2023-09-17 15:44:16 +00:00
_JWT_SIGNING_KEY = ' 2a9251d782700769fb856da5725daf38661874ca6f80ae7dc2b05ec1a81a24ae '
# _JWT_SIGNING_KEY = 'b5f500d55cb44715107249ccd8a5c0136cfb2788dbb71b90a4f142423bacaf38' # -dev
2023-07-10 13:15:47 +00:00
# player-stag.vrt.be key: d23987504521ae6fbf2716caca6700a24bb1579477b43c84e146b279de5ca595
# player.vrt.be key: 2a9251d782700769fb856da5725daf38661874ca6f80ae7dc2b05ec1a81a24ae
2023-06-02 18:29:00 +00:00
def _extract_formats_and_subtitles ( self , data , video_id ) :
if traverse_obj ( data , ' drm ' ) :
self . report_drm ( video_id )
formats , subtitles = [ ] , { }
for target in traverse_obj ( data , ( ' targetUrls ' , lambda _ , v : url_or_none ( v [ ' url ' ] ) and v [ ' type ' ] ) ) :
format_type = target [ ' type ' ] . upper ( )
format_url = target [ ' url ' ]
if format_type in ( ' HLS ' , ' HLS_AES ' ) :
fmts , subs = self . _extract_m3u8_formats_and_subtitles (
format_url , video_id , ' mp4 ' , m3u8_id = format_type , fatal = False )
formats . extend ( fmts )
self . _merge_subtitles ( subs , target = subtitles )
elif format_type == ' HDS ' :
formats . extend ( self . _extract_f4m_formats (
format_url , video_id , f4m_id = format_type , fatal = False ) )
elif format_type == ' MPEG_DASH ' :
fmts , subs = self . _extract_mpd_formats_and_subtitles (
format_url , video_id , mpd_id = format_type , fatal = False )
formats . extend ( fmts )
self . _merge_subtitles ( subs , target = subtitles )
elif format_type == ' HSS ' :
fmts , subs = self . _extract_ism_formats_and_subtitles (
format_url , video_id , ism_id = ' mss ' , fatal = False )
formats . extend ( fmts )
self . _merge_subtitles ( subs , target = subtitles )
else :
formats . append ( {
' format_id ' : format_type ,
' url ' : format_url ,
} )
for sub in traverse_obj ( data , ( ' subtitleUrls ' , lambda _ , v : v [ ' url ' ] and v [ ' type ' ] == ' CLOSED ' ) ) :
subtitles . setdefault ( ' nl ' , [ ] ) . append ( { ' url ' : sub [ ' url ' ] } )
return formats , subtitles
def _call_api ( self , video_id , client = ' null ' , id_token = None , version = ' v2 ' ) :
2023-09-21 20:13:24 +00:00
# player_info = {'exp': (round(time.time(), 3) + 900), **self._PLAYER_INFO}
# player_info_jwt = jwt_encode_hs256(player_info, self._JWT_SIGNING_KEY, headers={
# 'kid': self._JWT_KEY_ID
# }).decode()
2023-09-17 21:17:58 +00:00
json_response = self . _download_json (
2023-09-20 22:11:48 +00:00
f ' https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/ { version } /tokens ' ,
2023-11-04 21:11:19 +00:00
None , ' Downloading player token ' , ' Failed to download player token ' , headers = { ' Content-Type ' : ' application/json ' } , data = json . dumps ( { ' identityToken ' : id_token or self . _get_cookies ( " https://www.vrt.be " ) . get ( " vrtnu-site_profile_vt " ) . value } ) . encode ( ) )
2023-09-17 21:17:58 +00:00
player_token = json_response [ ' vrtPlayerToken ' ]
2023-06-02 18:29:00 +00:00
return self . _download_json (
2023-09-20 22:11:48 +00:00
f ' https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/ { version } /videos/ { video_id } ' ,
2023-11-04 21:11:19 +00:00
video_id , ' Downloading API JSON ' , ' Failed to download API JSON ' , query = {
2023-06-02 18:29:00 +00:00
' vrtPlayerToken ' : player_token ,
' client ' : client ,
2023-09-20 22:11:48 +00:00
} )
2023-06-02 18:29:00 +00:00
class VRTIE ( VRTBaseIE ) :
2019-05-15 14:02:51 +00:00
IE_DESC = ' VRT NWS, Flanders News, Flandern Info and Sporza '
_VALID_URL = r ' https?://(?:www \ .)?(?P<site>vrt \ .be/vrtnws|sporza \ .be)/[a-z] {2} / \ d {4} / \ d {2} / \ d {2} /(?P<id>[^/?&#]+) '
_TESTS = [ {
' url ' : ' https://www.vrt.be/vrtnws/nl/2019/05/15/beelden-van-binnenkant-notre-dame-een-maand-na-de-brand/ ' ,
' info_dict ' : {
' id ' : ' pbs-pub-7855fc7b-1448-49bc-b073-316cb60caa71$vid-2ca50305-c38a-4762-9890-65cbd098b7bd ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Beelden van binnenkant Notre-Dame, één maand na de brand ' ,
2023-06-02 18:29:00 +00:00
' description ' : ' md5:6fd85f999b2d1841aa5568f4bf02c3ff ' ,
2019-05-15 14:02:51 +00:00
' duration ' : 31.2 ,
2023-06-02 18:29:00 +00:00
' thumbnail ' : ' https://images.vrt.be/orig/2019/05/15/2d914d61-7710-11e9-abcc-02b7b76bf47f.jpg ' ,
2014-10-26 09:48:11 +00:00
} ,
2023-06-02 18:29:00 +00:00
' params ' : { ' skip_download ' : ' m3u8 ' } ,
2019-05-15 14:02:51 +00:00
} , {
' url ' : ' https://sporza.be/nl/2019/05/15/de-belgian-cats-zijn-klaar-voor-het-ek/ ' ,
' info_dict ' : {
2022-09-15 21:55:07 +00:00
' id ' : ' pbs-pub-e1d6e4ec-cbf4-451e-9e87-d835bb65cd28$vid-2ad45eb6-9bc8-40d4-ad72-5f25c0f59d75 ' ,
2022-09-16 23:16:24 +00:00
' ext ' : ' mp4 ' ,
2023-06-02 18:29:00 +00:00
' title ' : ' De Belgian Cats zijn klaar voor het EK ' ,
' description ' : ' Video: De Belgian Cats zijn klaar voor het EK mét Ann Wauters | basketbal, sport in het journaal ' ,
2019-05-15 14:02:51 +00:00
' duration ' : 115.17 ,
2023-06-02 18:29:00 +00:00
' thumbnail ' : ' https://images.vrt.be/orig/2019/05/15/11c0dba3-770e-11e9-abcc-02b7b76bf47f.jpg ' ,
2014-10-26 09:48:11 +00:00
} ,
2023-06-02 18:29:00 +00:00
' params ' : { ' skip_download ' : ' m3u8 ' } ,
2019-05-15 14:02:51 +00:00
} ]
2022-09-15 21:55:07 +00:00
_NETRC_MACHINE = ' vrtnu '
_APIKEY = ' 3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy '
_CONTEXT_ID = ' R3595707040 '
_REST_API_BASE_TOKEN = ' https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v2 '
_REST_API_BASE_VIDEO = ' https://media-services-public.vrt.be/media-aggregator/v2 '
_HLS_ENTRY_PROTOCOLS_MAP = {
' HLS ' : ' m3u8_native ' ,
' HLS_AES ' : ' m3u8_native ' ,
2019-05-15 14:02:51 +00:00
}
2014-10-26 09:48:11 +00:00
2022-09-15 21:55:07 +00:00
_authenticated = False
def _perform_login ( self , username , password ) :
auth_info = self . _gigya_login ( {
' APIKey ' : self . _APIKEY ,
' targetEnv ' : ' jssdk ' ,
' loginID ' : username ,
' password ' : password ,
' authMode ' : ' cookie ' ,
} )
if auth_info . get ( ' errorDetails ' ) :
raise ExtractorError ( ' Unable to login: VrtNU said: ' + auth_info . get ( ' errorDetails ' ) , expected = True )
# Sometimes authentication fails for no good reason, retry
login_attempt = 1
while login_attempt < = 3 :
try :
self . _request_webpage ( ' https://token.vrt.be/vrtnuinitlogin ' ,
None , note = ' Requesting XSRF Token ' , errnote = ' Could not get XSRF Token ' ,
query = { ' provider ' : ' site ' , ' destination ' : ' https://www.vrt.be/vrtnu/ ' } )
post_data = {
' UID ' : auth_info [ ' UID ' ] ,
' UIDSignature ' : auth_info [ ' UIDSignature ' ] ,
' signatureTimestamp ' : auth_info [ ' signatureTimestamp ' ] ,
' _csrf ' : self . _get_cookies ( ' https://login.vrt.be ' ) . get ( ' OIDCXSRF ' ) . value ,
}
self . _request_webpage (
' https://login.vrt.be/perform_login ' ,
None , note = ' Performing login ' , errnote = ' perform login failed ' ,
headers = { } , query = {
' client_id ' : ' vrtnu-site '
} , data = urlencode_postdata ( post_data ) )
except ExtractorError as e :
2023-09-21 20:13:24 +00:00
if isinstance ( e . cause , urllib . error . HTTPError ) and e . cause . code == 401 :
2022-09-15 21:55:07 +00:00
login_attempt + = 1
self . report_warning ( ' Authentication failed ' )
self . _sleep ( 1 , None , msg_template = ' Waiting for %(timeout)s seconds before trying again ' )
else :
raise e
else :
break
self . _authenticated = True
2014-10-26 09:48:11 +00:00
def _real_extract ( self , url ) :
2021-08-19 01:41:24 +00:00
site , display_id = self . _match_valid_url ( url ) . groups ( )
2019-05-15 14:02:51 +00:00
webpage = self . _download_webpage ( url , display_id )
2023-06-02 18:29:00 +00:00
attrs = extract_attributes ( get_element_html_by_class ( ' vrtvideo ' , webpage ) or ' ' )
2019-05-15 14:02:51 +00:00
2023-06-02 18:29:00 +00:00
asset_id = attrs . get ( ' data-video-id ' ) or attrs [ ' data-videoid ' ]
publication_id = traverse_obj ( attrs , ' data-publication-id ' , ' data-publicationid ' )
2019-05-15 14:02:51 +00:00
if publication_id :
2023-06-02 18:29:00 +00:00
asset_id = f ' { publication_id } $ { asset_id } '
client = traverse_obj ( attrs , ' data-client-code ' , ' data-client ' ) or self . _CLIENT_MAP [ site ]
data = self . _call_api ( asset_id , client )
formats , subtitles = self . _extract_formats_and_subtitles ( data , asset_id )
2019-05-15 14:02:51 +00:00
description = self . _html_search_meta (
[ ' og:description ' , ' twitter:description ' , ' description ' ] , webpage )
if description == ' … ' :
description = None
2014-10-26 09:48:11 +00:00
return {
2019-05-15 14:02:51 +00:00
' id ' : asset_id ,
2023-06-02 18:29:00 +00:00
' formats ' : formats ,
' subtitles ' : subtitles ,
2014-10-26 09:48:11 +00:00
' description ' : description ,
2023-06-02 18:29:00 +00:00
' thumbnail ' : url_or_none ( attrs . get ( ' data-posterimage ' ) ) ,
2019-05-15 14:02:51 +00:00
' duration ' : float_or_none ( attrs . get ( ' data-duration ' ) , 1000 ) ,
2023-06-02 18:29:00 +00:00
' _old_archive_ids ' : [ make_archive_id ( ' Canvas ' , asset_id ) ] ,
* * traverse_obj ( data , {
' title ' : ( ' title ' , { str } ) ,
' description ' : ( ' shortDescription ' , { str } ) ,
' duration ' : ( ' duration ' , { functools . partial ( float_or_none , scale = 1000 ) } ) ,
' thumbnail ' : ( ' posterImageUrl ' , { url_or_none } ) ,
} ) ,
}
class VrtNUIE ( VRTBaseIE ) :
IE_DESC = ' VRT MAX '
2023-09-20 22:11:48 +00:00
_VALID_URL = r ' https?://(?:www \ .)?vrt \ .be/(vrtmax|vrtnu)/a-z/(?:[^/]+/) {2} (?P<id>[^/?#&]+) '
2023-06-02 18:29:00 +00:00
_TESTS = [ {
# CONTENT_IS_AGE_RESTRICTED
' url ' : ' https://www.vrt.be/vrtnu/a-z/de-ideale-wereld/2023-vj/de-ideale-wereld-d20230116/ ' ,
' info_dict ' : {
' id ' : ' pbs-pub-855b00a8-6ce2-4032-ac4f-1fcf3ae78524$vid-d2243aa1-ec46-4e34-a55b-92568459906f ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Tom Waes ' ,
' description ' : ' Satirisch actualiteitenmagazine met Ella Leyers. Tom Waes is te gast. ' ,
' timestamp ' : 1673905125 ,
' release_timestamp ' : 1673905125 ,
' series ' : ' De ideale wereld ' ,
' season_id ' : ' 1672830988794 ' ,
' episode ' : ' Aflevering 1 ' ,
' episode_number ' : 1 ,
' episode_id ' : ' 1672830988861 ' ,
' display_id ' : ' de-ideale-wereld-d20230116 ' ,
' channel ' : ' VRT ' ,
' duration ' : 1939.0 ,
' thumbnail ' : ' https://images.vrt.be/orig/2023/01/10/1bb39cb3-9115-11ed-b07d-02b7b76bf47f.jpg ' ,
' release_date ' : ' 20230116 ' ,
' upload_date ' : ' 20230116 ' ,
' age_limit ' : 12 ,
} ,
} , {
' url ' : ' https://www.vrt.be/vrtnu/a-z/buurman--wat-doet-u-nu-/6/buurman--wat-doet-u-nu--s6-trailer/ ' ,
' info_dict ' : {
' id ' : ' pbs-pub-ad4050eb-d9e5-48c2-9ec8-b6c355032361$vid-0465537a-34a8-4617-8352-4d8d983b4eee ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Trailer seizoen 6 \' Buurman, wat doet u nu? \' ' ,
' description ' : ' md5:197424726c61384b4e5c519f16c0cf02 ' ,
' timestamp ' : 1652940000 ,
' release_timestamp ' : 1652940000 ,
' series ' : ' Buurman, wat doet u nu? ' ,
' season ' : ' Seizoen 6 ' ,
' season_number ' : 6 ,
' season_id ' : ' 1652344200907 ' ,
' episode ' : ' Aflevering 0 ' ,
' episode_number ' : 0 ,
' episode_id ' : ' 1652951873524 ' ,
' display_id ' : ' buurman--wat-doet-u-nu--s6-trailer ' ,
' channel ' : ' VRT ' ,
' duration ' : 33.13 ,
' thumbnail ' : ' https://images.vrt.be/orig/2022/05/23/3c234d21-da83-11ec-b07d-02b7b76bf47f.jpg ' ,
' release_date ' : ' 20220519 ' ,
' upload_date ' : ' 20220519 ' ,
} ,
' params ' : { ' skip_download ' : ' m3u8 ' } ,
} ]
_NETRC_MACHINE = ' vrtnu '
_authenticated = False
2023-09-21 20:55:53 +00:00
def _perform_login ( self , username , password ) :
2023-09-17 08:15:38 +00:00
2023-11-04 21:11:19 +00:00
login_page = self . _request_webpage ( ' https://www.vrt.be/vrtnu/sso/login ' , None , note = ' Getting session cookies ' , errnote = ' Failed to get session cookies ' )
2023-11-04 20:49:11 +00:00
res = self . _download_json (
' https://login.vrt.be/perform_login ' , None , data = json . dumps ( {
" loginID " : username ,
" password " : password ,
" clientId " : " vrtnu-site "
} ) . encode ( ) , headers = {
' Content-Type ' : ' application/json ' ,
' Oidcxsrf ' : self . _get_cookies ( ' https://login.vrt.be ' ) . get ( ' OIDCXSRF ' ) . value ,
2023-11-04 21:11:19 +00:00
} , note = ' Logging in ' , errnote = ' Login failed ' )
2023-09-21 20:55:53 +00:00
self . _authenticated = True
2023-11-04 20:49:11 +00:00
return
2023-09-17 14:19:33 +00:00
2023-09-21 20:55:53 +00:00
def _real_extract ( self , url ) :
display_id = self . _match_id ( url )
parsed_url = urllib . parse . urlparse ( url )
# 1. Obtain/refresh 'vrtnu-site_profile' tokens
2023-11-04 21:11:19 +00:00
res = self . _request_webpage ( ' https://www.vrt.be/vrtnu/sso/login ' , None , note = ' Getting tokens ' , errnote = ' Failed to get tokens ' )
2023-09-15 23:23:38 +00:00
2023-09-21 20:55:53 +00:00
# 2. Perform GraphQL query to obtain video metadata
2023-09-17 15:44:16 +00:00
headers = {
2023-09-21 20:13:24 +00:00
' Content-Type ' : ' application/json ' ,
2023-11-04 20:49:11 +00:00
' Authorization ' : f ' Bearer { self . _get_cookies ( " https://www.vrt.be " ) . get ( " vrtnu-site_profile_at " ) . value } '
2023-09-21 20:13:24 +00:00
}
2023-09-15 23:23:38 +00:00
2023-09-17 15:44:16 +00:00
data = {
2023-09-21 20:13:24 +00:00
' operationName ' : ' VideoPage ' ,
' query ' : self . _VIDEOPAGE_QUERY ,
' variables ' : {
' pageId ' : f ' { parsed_url . path . rstrip ( " / " ) } .model.json '
}
}
2023-09-15 23:23:38 +00:00
2023-09-21 20:55:53 +00:00
metadata = self . _download_json (
2023-09-17 21:17:58 +00:00
' https://www.vrt.be/vrtnu-api/graphql/v1 ' ,
display_id , ' Downloading asset JSON ' , ' Unable to download asset JSON ' , headers = headers , data = json . dumps ( data ) . encode ( ) ) [ ' data ' ] [ ' page ' ]
2023-06-02 18:29:00 +00:00
2023-09-21 20:55:53 +00:00
video_id = metadata [ ' episode ' ] [ ' watchAction ' ] [ ' streamId ' ]
ld_json = json . loads ( metadata [ ' ldjson ' ] [ 1 ] )
2023-09-17 15:44:16 +00:00
2023-09-21 20:55:53 +00:00
# 3. Obtain streaming info
streaming_info = self . _call_api ( video_id , client = ' vrtnu-web@PROD ' )
formats , subtitles = self . _extract_formats_and_subtitles ( streaming_info , video_id )
2014-10-26 09:48:11 +00:00
return {
2023-09-21 20:55:53 +00:00
* * traverse_obj ( metadata , {
2023-11-04 20:49:11 +00:00
' title ' : ( ' seo ' , ' title ' , { str_or_none } ) ,
' season_number ' : ( ' episode ' , ' onTimeRaw ' , { parse_year } ) ,
2023-11-04 14:36:41 +00:00
' description ' : ( ' seo ' , ' description ' , { str_or_none } ) ,
2023-09-21 20:13:24 +00:00
' timestamp ' : ( ' episode ' , ' onTimeRaw ' , { parse_iso8601 } ) ,
' release_timestamp ' : ( ' episode ' , ' onTimeRaw ' , { parse_iso8601 } ) ,
2023-09-17 21:17:58 +00:00
' series ' : ( ' episode ' , ' program ' , ' title ' ) ,
' episode ' : ( ' episode ' , ' episodeNumberRaw ' , { str_or_none } ) ,
' episode_number ' : ( ' episode ' , ' episodeNumberRaw ' , { int_or_none } ) ,
' age_limit ' : ( ' episode ' , ' ageRaw ' , { parse_age_limit } ) ,
' display_id ' : ( ' episode ' , ' name ' , { parse_age_limit } ) ,
} ) ,
* * traverse_obj ( ld_json , {
' season ' : ( ' partOfSeason ' , ' name ' ) ,
' season_id ' : ( ' partOfSeason ' , ' @id ' ) ,
' episode_id ' : ( ' @id ' , { str_or_none } ) ,
2023-06-02 18:29:00 +00:00
} ) ,
2022-09-15 21:55:07 +00:00
' id ' : video_id ,
2022-09-16 23:16:24 +00:00
' channel ' : ' VRT ' ,
2022-09-15 21:55:07 +00:00
' formats ' : formats ,
2023-09-21 20:55:53 +00:00
' duration ' : float_or_none ( streaming_info . get ( ' duration ' ) , 1000 ) ,
' thumbnail ' : url_or_none ( streaming_info . get ( ' posterImageUrl ' ) ) ,
2022-09-15 21:55:07 +00:00
' subtitles ' : subtitles ,
2023-06-02 18:29:00 +00:00
' _old_archive_ids ' : [ make_archive_id ( ' Canvas ' , video_id ) ] ,
}
class KetnetIE ( VRTBaseIE ) :
_VALID_URL = r ' https?://(?:www \ .)?ketnet \ .be/(?P<id>(?:[^/]+/)*[^/?#&]+) '
_TESTS = [ {
' url ' : ' https://www.ketnet.be/kijken/m/meisjes/6/meisjes-s6a5 ' ,
' info_dict ' : {
' id ' : ' pbs-pub-39f8351c-a0a0-43e6-8394-205d597d6162$vid-5e306921-a9aa-4fa9-9f39-5b82c8f1028e ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Meisjes ' ,
' episode ' : ' Reeks 6: Week 5 ' ,
' season ' : ' Reeks 6 ' ,
' series ' : ' Meisjes ' ,
' timestamp ' : 1685251800 ,
' upload_date ' : ' 20230528 ' ,
} ,
' params ' : { ' skip_download ' : ' m3u8 ' } ,
} ]
def _real_extract ( self , url ) :
display_id = self . _match_id ( url )
video = self . _download_json (
' https://senior-bff.ketnet.be/graphql ' , display_id , query = {
' query ' : ''' {
video ( id : " content/ketnet/nl/ %s .model.json " ) {
description
episodeNr
imageUrl
mediaReference
programTitle
publicationDate
seasonTitle
subtitleVideodetail
titleVideodetail
}
} ''' % d isplay_id,
} ) [ ' data ' ] [ ' video ' ]
video_id = urllib . parse . unquote ( video [ ' mediaReference ' ] )
data = self . _call_api ( video_id , ' ketnet@PROD ' , version = ' v1 ' )
formats , subtitles = self . _extract_formats_and_subtitles ( data , video_id )
return {
' id ' : video_id ,
' formats ' : formats ,
' subtitles ' : subtitles ,
' _old_archive_ids ' : [ make_archive_id ( ' Canvas ' , video_id ) ] ,
* * traverse_obj ( video , {
' title ' : ( ' titleVideodetail ' , { str } ) ,
' description ' : ( ' description ' , { str } ) ,
' thumbnail ' : ( ' thumbnail ' , { url_or_none } ) ,
' timestamp ' : ( ' publicationDate ' , { parse_iso8601 } ) ,
' series ' : ( ' programTitle ' , { str } ) ,
' season ' : ( ' seasonTitle ' , { str } ) ,
' episode ' : ( ' subtitleVideodetail ' , { str } ) ,
' episode_number ' : ( ' episodeNr ' , { int_or_none } ) ,
} ) ,
}
class DagelijkseKostIE ( VRTBaseIE ) :
IE_DESC = ' dagelijksekost.een.be '
_VALID_URL = r ' https?://dagelijksekost \ .een \ .be/gerechten/(?P<id>[^/?#&]+) '
_TESTS = [ {
' url ' : ' https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof ' ,
' info_dict ' : {
' id ' : ' md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Hachis parmentier met witloof ' ,
' description ' : ' md5:9960478392d87f63567b5b117688cdc5 ' ,
' display_id ' : ' hachis-parmentier-met-witloof ' ,
} ,
' params ' : { ' skip_download ' : ' m3u8 ' } ,
} ]
def _real_extract ( self , url ) :
display_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , display_id )
video_id = self . _html_search_regex (
r ' data-url=([ " \' ])(?P<id>(?:(?! \ 1).)+) \ 1 ' , webpage , ' video id ' , group = ' id ' )
data = self . _call_api ( video_id , ' dako@prod ' , version = ' v1 ' )
formats , subtitles = self . _extract_formats_and_subtitles ( data , video_id )
return {
' id ' : video_id ,
' formats ' : formats ,
' subtitles ' : subtitles ,
' display_id ' : display_id ,
' title ' : strip_or_none ( get_element_by_class (
' dish-metadata__title ' , webpage ) or self . _html_search_meta ( ' twitter:title ' , webpage ) ) ,
' description ' : clean_html ( get_element_by_class (
' dish-description ' , webpage ) ) or self . _html_search_meta (
[ ' description ' , ' twitter:description ' , ' og:description ' ] , webpage ) ,
' _old_archive_ids ' : [ make_archive_id ( ' Canvas ' , video_id ) ] ,
2014-11-23 19:41:03 +00:00
}