From f41b949a2ef646fbc36375febbe3f0c19d742c0f Mon Sep 17 00:00:00 2001 From: Daniel Rich Date: Thu, 1 Jun 2023 14:52:03 -0700 Subject: [PATCH] [extractor/nhk] Fix API extraction (#7180) Closes #6992 Authored by: sjthespian, menschel Co-authored-by: Patrick Menschel --- yt_dlp/extractor/nhk.py | 36 ++++++++++++++++++++++++++++-------- yt_dlp/extractor/piksel.py | 16 +++++++++------- 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py index 1597962ac..a3efa326a 100644 --- a/yt_dlp/extractor/nhk.py +++ b/yt_dlp/extractor/nhk.py @@ -67,7 +67,7 @@ def get_clean_field(key): info.update({ '_type': 'url_transparent', 'ie_key': 'Piksel', - 'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + vod_id, + 'url': 'https://movie-s.nhk.or.jp/v/refid/nhkworld/prefid/' + vod_id, 'id': vod_id, }) else: @@ -94,6 +94,19 @@ class NhkVodIE(NhkBaseIE): # Content available only for a limited period of time. Visit # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples. _TESTS = [{ + 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2061601/', + 'info_dict': { + 'id': 'yd8322ch', + 'ext': 'mp4', + 'description': 'md5:109c8b05d67a62d0592f2b445d2cd898', + 'title': 'GRAND SUMO Highlights - [Recap] May Tournament Day 1 (Opening Day)', + 'upload_date': '20230514', + 'timestamp': 1684083791, + 'series': 'GRAND SUMO Highlights', + 'episode': '[Recap] May Tournament Day 1 (Opening Day)', + 'thumbnail': 'https://mz-edge.stream.co.jp/thumbs/aid/t1684084443/4028649.jpg?w=1920&h=1080', + }, + }, { # video clip 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/', 'md5': '7a90abcfe610ec22a6bfe15bd46b30ca', @@ -104,6 +117,9 @@ class NhkVodIE(NhkBaseIE): 'description': 'md5:5aee4a9f9d81c26281862382103b0ea5', 'timestamp': 1565965194, 'upload_date': '20190816', + 'thumbnail': 'https://mz-edge.stream.co.jp/thumbs/aid/t1567086278/3715195.jpg?w=1920&h=1080', + 'series': 'Dining with the Chef', + 'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU', }, }, { # audio clip @@ -114,10 +130,7 @@ class NhkVodIE(NhkBaseIE): 'title': "Japan's Top Inventions - Miniature Video Cameras", 'description': 'md5:07ea722bdbbb4936fdd360b6a480c25b', }, - 'params': { - # m3u8 download - 'skip_download': True, - }, + 'skip': '404 Not Found', }, { 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/', 'only_matching': True, @@ -133,7 +146,6 @@ class NhkVodIE(NhkBaseIE): }, { # video, alphabetic character in ID #29670 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/', - 'only_matching': True, 'info_dict': { 'id': 'qfjay6cg', 'ext': 'mp4', @@ -142,7 +154,8 @@ class NhkVodIE(NhkBaseIE): 'thumbnail': r're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$', 'upload_date': '20210615', 'timestamp': 1623722008, - } + }, + 'skip': '404 Not Found', }] def _real_extract(self, url): @@ -153,12 +166,19 @@ class NhkVodProgramIE(NhkBaseIE): _VALID_URL = r'%s/program%s(?P[0-9a-z]+)(?:.+?\btype=(?Pclip|(?:radio|tv)Episode))?' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX) _TESTS = [{ # video program episodes + 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/sumo', + 'info_dict': { + 'id': 'sumo', + 'title': 'GRAND SUMO Highlights', + }, + 'playlist_mincount': 12, + }, { 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway', 'info_dict': { 'id': 'japanrailway', 'title': 'Japan Railway Journal', }, - 'playlist_mincount': 1, + 'playlist_mincount': 12, }, { # video program clips 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway/?type=clip', diff --git a/yt_dlp/extractor/piksel.py b/yt_dlp/extractor/piksel.py index cc60b304e..97a9bf574 100644 --- a/yt_dlp/extractor/piksel.py +++ b/yt_dlp/extractor/piksel.py @@ -7,8 +7,10 @@ int_or_none, join_nonempty, parse_iso8601, + traverse_obj, try_get, unescapeHTML, + urljoin, ) @@ -63,11 +65,11 @@ class PikselIE(InfoExtractor): } ] - def _call_api(self, app_token, resource, display_id, query, fatal=True): - response = (self._download_json( - 'http://player.piksel.com/ws/ws_%s/api/%s/mode/json/apiv/5' % (resource, app_token), - display_id, query=query, fatal=fatal) or {}).get('response') - failure = try_get(response, lambda x: x['failure']['reason']) + def _call_api(self, app_token, resource, display_id, query, host='https://player.piksel.com', fatal=True): + url = urljoin(host, f'/ws/ws_{resource}/api/{app_token}/mode/json/apiv/5') + response = traverse_obj( + self._download_json(url, display_id, query=query, fatal=fatal), ('response', {dict})) or {} + failure = traverse_obj(response, ('failure', 'reason')) if response else 'Empty response from API' if failure: if fatal: raise ExtractorError(failure, expected=True) @@ -83,7 +85,7 @@ def _real_extract(self, url): ], webpage, 'app token') query = {'refid': ref_id, 'prefid': display_id} if ref_id else {'v': display_id} program = self._call_api( - app_token, 'program', display_id, query)['WsProgramResponse']['program'] + app_token, 'program', display_id, query, url)['WsProgramResponse']['program'] video_id = program['uuid'] video_data = program['asset'] title = video_data['title'] @@ -129,7 +131,7 @@ def process_asset_files(asset_files): process_asset_files(try_get(self._call_api( app_token, 'asset_file', display_id, { 'assetid': asset_id, - }, False), lambda x: x['WsAssetFileResponse']['AssetFiles'])) + }, url, False), lambda x: x['WsAssetFileResponse']['AssetFiles'])) m3u8_url = dict_get(video_data, [ 'm3u8iPadURL',