Download works

This commit is contained in:
bergoid 2023-09-17 23:17:58 +02:00
parent c96f4429f3
commit deeb4857bd

View file

@ -4,9 +4,6 @@
from http.cookiejar import CookieJar
import urllib.parse
import urllib.request
##################
import sys
##################
from .gigya import GigyaBaseIE
from ..networking.exceptions import HTTPError
@ -33,6 +30,7 @@
class VRTBaseIE(GigyaBaseIE):
_GEO_BYPASS = False
_PLAYER_INFO = {
'platform': 'desktop',
'app': {
@ -49,6 +47,9 @@ class VRTBaseIE(GigyaBaseIE):
'version': '3.2.6-prod-2023-09-11T12:37:41'
}
}
_VIDEOPAGE_QUERY = "query VideoPage($pageId: ID!) {\n page(id: $pageId) {\n ... on EpisodePage {\n id\n title\n permalink\n seo {\n ...seoFragment\n __typename\n }\n socialSharing {\n ...socialSharingFragment\n __typename\n }\n trackingData {\n data\n perTrigger {\n trigger\n data\n template {\n id\n __typename\n }\n __typename\n }\n __typename\n }\n ldjson\n components {\n __typename\n ... on IComponent {\n componentType\n __typename\n }\n }\n episode {\n id\n title\n available\n whatsonId\n brand\n brandLogos {\n type\n width\n height\n primary\n mono\n __typename\n }\n logo\n primaryMeta {\n ...metaFragment\n __typename\n }\n secondaryMeta {\n ...metaFragment\n __typename\n }\n image {\n ...imageFragment\n __typename\n }\n durationRaw\n durationValue\n durationSeconds\n onTimeRaw\n offTimeRaw\n ageRaw\n regionRaw\n announcementValue\n name\n episodeNumberRaw\n episodeNumberValue\n subtitle\n richDescription {\n __typename\n html\n }\n program {\n id\n link\n title\n __typename\n }\n watchAction {\n streamId\n videoId\n episodeId\n avodUrl\n resumePoint\n __typename\n }\n shareAction {\n title\n description\n image {\n templateUrl\n __typename\n }\n url\n __typename\n }\n favoriteAction {\n id\n title\n favorite\n programWhatsonId\n programUrl\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n}\nfragment metaFragment on MetaDataItem {\n __typename\n type\n value\n shortValue\n longValue\n}\nfragment imageFragment on Image {\n objectId\n id: objectId\n alt\n title\n focalPoint\n templateUrl\n}\nfragment seoFragment on SeoProperties {\n __typename\n title\n description\n}\nfragment socialSharingFragment on SocialSharingProperties {\n __typename\n title\n description\n image {\n __typename\n id: objectId\n templateUrl\n }\n}"
# From https://player.vrt.be/vrtnws/js/main.js & https://player.vrt.be/ketnet/js/main.8cdb11341bcb79e4cd44.js
_JWT_KEY_ID = '0-0Fp51UZykfaiCJrfTE3+oMI8zvDteYfPtR+2n1R+z8w='
_JWT_SIGNING_KEY = '2a9251d782700769fb856da5725daf38661874ca6f80ae7dc2b05ec1a81a24ae'
@ -95,17 +96,24 @@ def _extract_formats_and_subtitles(self, data, video_id):
def _call_api(self, video_id, client='null', id_token=None, version='v2'):
player_info = {'exp': (round(time.time(), 3) + 900), **self._PLAYER_INFO}
player_token = self._download_json(
'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v2/tokens',
video_id, 'Downloading player token', headers={
**self.geo_verification_headers(),
'Content-Type': 'application/json',
}, data=json.dumps({
'identityToken': id_token or {},
'playerInfo': jwt_encode_hs256(player_info, self._JWT_SIGNING_KEY, headers={
player_info_jwt = jwt_encode_hs256(player_info, self._JWT_SIGNING_KEY, headers={
'kid': self._JWT_KEY_ID
}).decode()
}, separators=(',', ':')).encode())['vrtPlayerToken']
headers = {
**self.geo_verification_headers(),
'Content-Type': 'application/json',
}
data = {
'identityToken': id_token or self._cookies['vrtnu-site_profile_vt'],
'playerInfo': player_info_jwt
}
json_response = self._download_json(
'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v2/tokens',
None, 'Downloading player token', headers=headers, data=json.dumps(data).encode())
player_token = json_response['vrtPlayerToken']
return self._download_json(
f'https://media-services-public.vrt.be/media-aggregator/{version}/media-items/{video_id}',
@ -154,7 +162,6 @@ class VRTIE(VRTBaseIE):
}
_authenticated = False
_id_token = ''
def _perform_login(self, username, password):
auth_info = self._gigya_login({
@ -312,19 +319,11 @@ class VrtNUIE(VRTBaseIE):
}]
_NETRC_MACHINE = 'vrtnu'
_authenticated = False
# def _extract_cookies(self, res):
# cookies_nvp = [header_value.split(';')[0] for header_value in res.headers.get_all('Set-Cookie')]
# return {name: value for nvp in cookies_nvp for name, value in [nvp.split('=')]}
#
# def _create_cookie_header(self, cookies):
# return { 'Cookie': '; '.join([f'{key}={value}' for key, value in cookies.items()]) }
_cookies = CookiePot()
def _perform_login(self, username, password):
cookies = CookiePot()
# TODO:
# 1. Does the _request_webpage() respect this opener too?
# 2. If so:
@ -335,50 +334,26 @@ def _perform_login(self, username, password):
# Disable automatic redirection to be able to
# grab necessary info in intermediate step
opener= urllib.request.build_opener(NoRedirect,urllib.request.HTTPCookieProcessor(cookies))
opener= urllib.request.build_opener(NoRedirect,urllib.request.HTTPCookieProcessor(self._cookies))
urllib.request.install_opener(opener)
# 1.a Visit 'login' URL. Get 'authorize' location and 'oidcstate' cookie
res = urllib.request.urlopen('https://www.vrt.be/vrtnu/sso/login', None)
auth_url = res.headers.get_all('Location')[0]
# print("===================================")
# print('login')
# print(res.status)
# print(res.headers.get_all('Location')[0])
# # for cookie in cookies:
# # print(f'{cookie.name}={cookie.value}')
# print("===================================")
# 1.b Follow redirection: visit 'authorize' URL. Get OIDCXSRF & SESSION cookies
res = urllib.request.urlopen(auth_url, None)
cookies_header = f'OIDCXSRF={cookies["OIDCXSRF"]}; SESSION={cookies["SESSION"]}'
# print("===================================")
# print('authorize')
# print(res.status)
# print(cookies)
# print("===================================")
# sys.exit(0)
cookies_header = f'OIDCXSRF={self._cookies["OIDCXSRF"]}; SESSION={self._cookies["SESSION"]}'
# 2. Perform login
headers = {
'Content-Type': 'application/json',
'Oidcxsrf': cookies["OIDCXSRF"],
'Oidcxsrf': self._cookies["OIDCXSRF"],
'Cookie': cookies_header
}
post_data = { "loginID": f"{username}", "password": f"{password}", "clientId": "vrtnu-site" }
res = self._request_webpage('https://login.vrt.be/perform_login', None, note='Performing login', errnote='Login failed', fatal=True, data=json.dumps(post_data).encode(), headers=headers)
# print("===================================")
# print('perform_login')
# print(res.status)
# print("===================================")
# sys.exit(0)
# TODO:
# . re-enable auto redir here and do step 3 in one urlopen() call?
# . should this step be the new "refreshtoken" in _real_extract?
@ -392,144 +367,71 @@ def _perform_login(self, username, password):
res = urllib.request.urlopen(request, None)
callback_url = res.headers.get_all('Location')[0]
# print("===================================")
# print('authorize')
# print(res.status)
# print(res.headers.get_all('Location')[0])
# # print(cookies)
# # print(json.dumps(tokens))
# print("===================================")
# 3.b Visit 'callback'
headers = {
'Cookie': f'oidcstate={cookies["oidcstate"]}'
'Cookie': f'oidcstate={self._cookies["oidcstate"]}'
}
request = urllib.request.Request(callback_url, headers=headers)
res = urllib.request.urlopen(request, None)
_id_token = cookies['vrtnu-site_profile_vt']
# print("===================================")
# print('callback')
# print(res.status)
# print(res.headers)
# print(cookies)
# print(json.dumps(tokens))
# for cookie in cookies:
# print(f'{cookie.name}={cookie.value}')
print("===================================")
# sys.exit(0)
# 4. Obtain vrtPlayerToken
# TODO: make this a constant at the top
#########################################
player_info_base = {
'platform': 'desktop',
'app': {
'type': 'browser',
'name': 'Chrome'
},
'device': 'undefined (undefined)',
'os': {
'name': 'Windows',
'version': 'x86_64'
},
'player': {
'name': 'VRT web player',
'version': '3.2.6-prod-2023-09-11T12:37:41'
}
}
#########################################
# TODO: should move to _call_api()
player_info = {'exp': (round(time.time(), 3) + 900), **player_info_base}
player_info_jwt = jwt_encode_hs256(player_info, self._JWT_SIGNING_KEY, headers={
'kid': self._JWT_KEY_ID
}).decode()
headers = {
**self.geo_verification_headers(),
'Content-Type': 'application/json',
}
data = {
'identityToken': _id_token,
'playerInfo': player_info_jwt
}
json_response = self._download_json(
'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v2/tokens',
None, 'Downloading player token', headers=headers, data=json.dumps(data).encode())
print("===================================")
print(json.dumps(json_response))
print("===================================")
sys.exit(0)
self._authenticated = True
def _real_extract(self, url):
display_id = self._match_id(url)
parsed_url = urllib.parse.urlparse(url)
print(f'Model JSON URL: {parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path.rstrip("/")}.model.json')
details = self._download_json(
f'{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path.rstrip("/")}.model.json',
display_id, 'Downloading asset JSON', 'Unable to download asset JSON')['details']
print(f'pageId: {parsed_url.path.rstrip("/")}.model.json')
watch_info = traverse_obj(details, (
'actions', lambda _, v: v['type'] == 'watch-episode', {dict}), get_all=False) or {}
video_id = join_nonempty(
'episodePublicationId', 'episodeVideoId', delim='$', from_dict=watch_info)
if '$' not in video_id:
raise ExtractorError('Unable to extract video ID')
headers = {
'Origin': 'https://www.vrt.be',
'Referer': f'{url}',
'Content-Type': 'application/json',
'Authorization': f'Bearer {self._cookies["vrtnu-site_profile_at"]}'
}
# vrtnutoken = self._download_json(
# 'https://token.vrt.be/refreshtoken', video_id, note='Retrieving vrtnutoken',
# errnote='Token refresh failed')['vrtnutoken'] if self._authenticated else None
data = {
'operationName': 'VideoPage',
'query': self._VIDEOPAGE_QUERY ,
'variables': {
'pageId': f'{parsed_url.path.rstrip("/")}.model.json'
}
}
video_info = self._call_api(video_id, 'vrtnu-web@PROD', vrtnutoken)
model_json = self._download_json(
'https://www.vrt.be/vrtnu-api/graphql/v1',
display_id, 'Downloading asset JSON', 'Unable to download asset JSON', headers=headers, data=json.dumps(data).encode())['data']['page']
if 'title' not in video_info:
code = video_info.get('code')
if code in ('AUTHENTICATION_REQUIRED', 'CONTENT_IS_AGE_RESTRICTED'):
self.raise_login_required(code, method='password')
elif code in ('INVALID_LOCATION', 'CONTENT_AVAILABLE_ONLY_IN_BE'):
self.raise_geo_restricted(countries=['BE'])
elif code == 'CONTENT_AVAILABLE_ONLY_FOR_BE_RESIDENTS_AND_EXPATS':
if not self._authenticated:
self.raise_login_required(code, method='password')
self.raise_geo_restricted(countries=['BE'])
raise ExtractorError(code, expected=True)
video_id = model_json['episode']['watchAction']['streamId']
title = model_json['seo']['title']
season_number = int(model_json['episode']['onTimeRaw'][:4])
ld_json = json.loads(model_json['ldjson'][1])
formats, subtitles = self._extract_formats_and_subtitles(video_info, video_id)
streaming_json = self._call_api(video_id, client='vrtnu-web@PROD')
formats, subtitles = self._extract_formats_and_subtitles(streaming_json, video_id)
return {
**traverse_obj(details, {
'title': 'title',
'description': ('description', {clean_html}),
'timestamp': ('data', 'episode', 'onTime', 'raw', {parse_iso8601}),
'release_timestamp': ('data', 'episode', 'onTime', 'raw', {parse_iso8601}),
'series': ('data', 'program', 'title'),
'season': ('data', 'season', 'title', 'value'),
'season_number': ('data', 'season', 'title', 'raw', {int_or_none}),
'season_id': ('data', 'season', 'id', {str_or_none}),
'episode': ('data', 'episode', 'number', 'value', {str_or_none}),
'episode_number': ('data', 'episode', 'number', 'raw', {int_or_none}),
'episode_id': ('data', 'episode', 'id', {str_or_none}),
'age_limit': ('data', 'episode', 'age', 'raw', {parse_age_limit}),
**traverse_obj(model_json, {
'description': ('seo', 'description', {clean_html}),
'timestamp': ( 'episode', 'onTimeRaw', {parse_iso8601}),
'release_timestamp': ( 'episode', 'onTimeRaw', {parse_iso8601}),
'series': ('episode', 'program', 'title'),
'episode': ('episode', 'episodeNumberRaw', {str_or_none}),
'episode_number': ('episode', 'episodeNumberRaw', {int_or_none}),
'age_limit': ('episode', 'ageRaw', {parse_age_limit}),
'display_id': ('episode', 'name', {parse_age_limit}),
}),
**traverse_obj(ld_json, {
'season': ('partOfSeason', 'name'),
'season_id': ('partOfSeason', '@id'),
'episode_id': ('@id', {str_or_none}),
}),
'title': title,
'season_number': season_number,
'id': video_id,
'display_id': display_id,
'channel': 'VRT',
'formats': formats,
'duration': float_or_none(video_info.get('duration'), 1000),
'thumbnail': url_or_none(video_info.get('posterImageUrl')),
'duration': float_or_none(streaming_json.get('duration'), 1000),
'thumbnail': url_or_none(streaming_json.get('posterImageUrl')),
'subtitles': subtitles,
'_old_archive_ids': [make_archive_id('Canvas', video_id)],
}