From 8cd69fc40786d081b5523f9dc20861c130a2843d Mon Sep 17 00:00:00 2001 From: Jules-A Date: Sat, 9 Oct 2021 23:21:41 +0800 Subject: [PATCH] [Funimation] Fix for /v/ urls (#1196) Closes #993 Authored by: pukkandan, Jules-A --- yt_dlp/extractor/funimation.py | 133 ++++++++++++++++++--------------- 1 file changed, 72 insertions(+), 61 deletions(-) diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index ede53b326..382cbe159 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -2,26 +2,61 @@ from __future__ import unicode_literals import random +import re import string from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( determine_ext, - dict_get, int_or_none, js_to_json, - str_or_none, - try_get, + orderedSet, qualities, + str_or_none, + traverse_obj, + try_get, urlencode_postdata, ExtractorError, ) -class FunimationPageIE(InfoExtractor): +class FunimationBaseIE(InfoExtractor): + _NETRC_MACHINE = 'funimation' + _REGION = None + _TOKEN = None + + def _get_region(self): + region_cookie = self._get_cookies('https://www.funimation.com').get('region') + region = region_cookie.value if region_cookie else self.get_param('geo_bypass_country') + return region or traverse_obj( + self._download_json( + 'https://geo-service.prd.funimationsvc.com/geo/v1/region/check', None, fatal=False, + note='Checking geo-location', errnote='Unable to fetch geo-location information'), + 'region') or 'US' + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + try: + data = self._download_json( + 'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/', + None, 'Logging in', data=urlencode_postdata({ + 'username': username, + 'password': password, + })) + return data['token'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + error = self._parse_json(e.cause.read().decode(), None)['error'] + raise ExtractorError(error, expected=True) + raise + + +class FunimationPageIE(FunimationBaseIE): IE_NAME = 'funimation:page' - _VALID_URL = r'(?Phttps?://(?:www\.)?funimation(?:\.com|now\.uk))/(?P[^/]+/)?(?Pshows/(?P[^/]+/[^/?#&]+).*$)' + _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:(?P[^/]+)/)?(?:shows|v)/(?P[^/]+)/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/', @@ -46,38 +81,34 @@ class FunimationPageIE(InfoExtractor): }, { 'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/', 'only_matching': True, + }, { + 'url': 'https://www.funimation.com/v/a-certain-scientific-railgun/super-powered-level-5', + 'only_matching': True, }] + def _real_initialize(self): + if not self._REGION: + FunimationBaseIE._REGION = self._get_region() + if not self._TOKEN: + FunimationBaseIE._TOKEN = self._login() + def _real_extract(self, url): - mobj = self._match_valid_url(url) - display_id = mobj.group('id').replace('/', '_') - if not mobj.group('lang'): - url = '%s/en/%s' % (mobj.group('origin'), mobj.group('path')) + locale, show, episode = self._match_valid_url(url).group('lang', 'show', 'episode') - webpage = self._download_webpage(url, display_id) - title_data = self._parse_json(self._search_regex( - r'TITLE_DATA\s*=\s*({[^}]+})', - webpage, 'title data', default=''), - display_id, js_to_json, fatal=False) or {} + video_id = traverse_obj(self._download_json( + f'https://title-api.prd.funimationsvc.com/v1/shows/{show}/episodes/{episode}', + f'{show}_{episode}', query={ + 'deviceType': 'web', + 'region': self._REGION, + 'locale': locale or 'en' + }), ('videoList', ..., 'id'), get_all=False) - video_id = ( - title_data.get('id') - or self._search_regex( - (r"KANE_customdimensions.videoID\s*=\s*'(\d+)';", r']+src="/player/(\d+)'), - webpage, 'video_id', default=None) - or self._search_regex( - r'/player/(\d+)', - self._html_search_meta(['al:web:url', 'og:video:url', 'og:video:secure_url'], webpage, fatal=True), - 'video id')) return self.url_result(f'https://www.funimation.com/player/{video_id}', FunimationIE.ie_key(), video_id) -class FunimationIE(InfoExtractor): +class FunimationIE(FunimationBaseIE): _VALID_URL = r'https?://(?:www\.)?funimation\.com/player/(?P\d+)' - _NETRC_MACHINE = 'funimation' - _TOKEN = None - _TESTS = [{ 'url': 'https://www.funimation.com/player/210051', 'info_dict': { @@ -93,7 +124,7 @@ class FunimationIE(InfoExtractor): 'season_number': 99, 'series': 'Attack on Titan: Junior High', 'description': '', - 'duration': 154, + 'duration': 155, }, 'params': { 'skip_download': 'm3u8', @@ -114,7 +145,7 @@ class FunimationIE(InfoExtractor): 'season_number': 99, 'series': 'Attack on Titan: Junior High', 'description': '', - 'duration': 154, + 'duration': 155, }, 'params': { 'skip_download': 'm3u8', @@ -122,26 +153,9 @@ class FunimationIE(InfoExtractor): }, }] - def _login(self): - username, password = self._get_login_info() - if username is None: - return - try: - data = self._download_json( - 'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/', - None, 'Logging in', data=urlencode_postdata({ - 'username': username, - 'password': password, - })) - self._TOKEN = data['token'] - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: - error = self._parse_json(e.cause.read().decode(), None)['error'] - raise ExtractorError(error, expected=True) - raise - def _real_initialize(self): - self._login() + if not self._TOKEN: + FunimationBaseIE._TOKEN = self._login() @staticmethod def _get_experiences(episode): @@ -283,7 +297,7 @@ def _get_subtitles(self, subtitles, experience_id, episode, display_id, format_n return subtitles -class FunimationShowIE(FunimationIE): +class FunimationShowIE(FunimationBaseIE): IE_NAME = 'funimation:show' _VALID_URL = r'(?Phttps?://(?:www\.)?funimation(?:\.com|now\.uk)/(?P[^/]+)?/?shows/(?P[^/?#&]+))/?(?:[?#]|$)' @@ -311,31 +325,28 @@ class FunimationShowIE(FunimationIE): }] def _real_initialize(self): - region = self._get_cookies('https://www.funimation.com').get('region') - self._region = region.value if region else try_get( - self._download_json( - 'https://geo-service.prd.funimationsvc.com/geo/v1/region/check', None, fatal=False, - note='Checking geo-location', errnote='Unable to fetch geo-location information'), - lambda x: x['region']) or 'US' + if not self._REGION: + FunimationBaseIE._REGION = self._get_region() def _real_extract(self, url): base_url, locale, display_id = self._match_valid_url(url).groups() show_info = self._download_json( 'https://title-api.prd.funimationsvc.com/v2/shows/%s?region=%s&deviceType=web&locale=%s' - % (display_id, self._region, locale or 'en'), display_id) - items = self._download_json( + % (display_id, self._REGION, locale or 'en'), display_id) + items_info = self._download_json( 'https://prod-api-funimationnow.dadcdigital.com/api/funimation/episodes/?limit=99999&title_id=%s' - % show_info.get('id'), display_id).get('items') - vod_items = map(lambda k: dict_get(k, ('mostRecentSvod', 'mostRecentAvod')).get('item'), items) + % show_info.get('id'), display_id) + + vod_items = traverse_obj(items_info, ('items', ..., re.compile('(?i)mostRecent[AS]vod').match, 'item')) return { '_type': 'playlist', 'id': show_info['id'], 'title': show_info['name'], - 'entries': [ + 'entries': orderedSet( self.url_result( '%s/%s' % (base_url, vod_item.get('episodeSlug')), FunimationPageIE.ie_key(), vod_item.get('episodeId'), vod_item.get('episodeName')) - for vod_item in sorted(vod_items, key=lambda x: x.get('episodeOrder'))], + for vod_item in sorted(vod_items, key=lambda x: x.get('episodeOrder', -1))), }