mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-02 06:27:21 +00:00
[ie/nfl] Fix extractors
Authored by: bashonly
This commit is contained in:
parent
5bc5fb2835
commit
860e02cadb
|
@ -33,24 +33,6 @@ class AnvatoIE(InfoExtractor):
|
||||||
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js
|
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14
|
|
||||||
'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441',
|
|
||||||
'md5': '921919dab3cd0b849ff3d624831ae3e2',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '899441',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14',
|
|
||||||
'description': 'md5:85e05a3cc163f8c344340f220521136d',
|
|
||||||
'upload_date': '20201215',
|
|
||||||
'timestamp': 1608009755,
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
|
||||||
'uploader': 'NFL',
|
|
||||||
'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights',
|
|
||||||
'Player Highlights', 'Cleveland Browns', 'league'],
|
|
||||||
'duration': 157,
|
|
||||||
'categories': ['Entertainment', 'Game', 'Highlights'],
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
# from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/
|
# from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/
|
||||||
'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455',
|
'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455',
|
||||||
'md5': '837718bcfb3a7778d022f857f7a9b19e',
|
'md5': '837718bcfb3a7778d022f857f7a9b19e',
|
||||||
|
@ -241,31 +223,6 @@ class AnvatoIE(InfoExtractor):
|
||||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582',
|
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _generate_nfl_token(self, anvack, mcp_id):
|
|
||||||
reroute = self._download_json(
|
|
||||||
'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials',
|
|
||||||
headers={'X-Domain-Id': 100}, note='Fetching token info')
|
|
||||||
token_type = reroute.get('token_type') or 'Bearer'
|
|
||||||
auth_token = f'{token_type} {reroute["access_token"]}'
|
|
||||||
response = self._download_json(
|
|
||||||
'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
|
|
||||||
'query': '''{
|
|
||||||
viewer {
|
|
||||||
mediaToken(anvack: "%s", id: %s) {
|
|
||||||
token
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}''' % (anvack, mcp_id), # noqa: UP031
|
|
||||||
}).encode(), headers={
|
|
||||||
'Authorization': auth_token,
|
|
||||||
'Content-Type': 'application/json',
|
|
||||||
}, note='Fetching NFL API token')
|
|
||||||
return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token'))
|
|
||||||
|
|
||||||
_TOKEN_GENERATORS = {
|
|
||||||
'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _server_time(self, access_key, video_id):
|
def _server_time(self, access_key, video_id):
|
||||||
return int_or_none(traverse_obj(self._download_json(
|
return int_or_none(traverse_obj(self._download_json(
|
||||||
f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key},
|
f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key},
|
||||||
|
@ -290,8 +247,6 @@ def _get_video_json(self, access_key, video_id, extracted_token):
|
||||||
}
|
}
|
||||||
if extracted_token is not None:
|
if extracted_token is not None:
|
||||||
api['anvstk2'] = extracted_token
|
api['anvstk2'] = extracted_token
|
||||||
elif self._TOKEN_GENERATORS.get(access_key) is not None:
|
|
||||||
api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id)
|
|
||||||
elif self._ANVACK_TABLE.get(access_key) is not None:
|
elif self._ANVACK_TABLE.get(access_key) is not None:
|
||||||
api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}')
|
api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}')
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -11,9 +11,12 @@
|
||||||
clean_html,
|
clean_html,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
traverse_obj,
|
int_or_none,
|
||||||
|
make_archive_id,
|
||||||
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class NFLBaseIE(InfoExtractor):
|
class NFLBaseIE(InfoExtractor):
|
||||||
|
@ -75,22 +78,15 @@ class NFLBaseIE(InfoExtractor):
|
||||||
'osVersion': '10.0',
|
'osVersion': '10.0',
|
||||||
}, separators=(',', ':')).encode()).decode(),
|
}, separators=(',', ':')).encode()).decode(),
|
||||||
'networkType': 'other',
|
'networkType': 'other',
|
||||||
'nflClaimGroupsToAdd': [],
|
'peacockUUID': 'undefined',
|
||||||
'nflClaimGroupsToRemove': [],
|
|
||||||
}
|
}
|
||||||
_ACCOUNT_INFO = {}
|
_ACCOUNT_INFO = {}
|
||||||
_API_KEY = None
|
_API_KEY = '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f'
|
||||||
|
|
||||||
_TOKEN = None
|
_TOKEN = None
|
||||||
_TOKEN_EXPIRY = 0
|
_TOKEN_EXPIRY = 0
|
||||||
|
|
||||||
def _get_account_info(self, url, slug):
|
def _get_account_info(self):
|
||||||
if not self._API_KEY:
|
|
||||||
webpage = self._download_webpage(url, slug, fatal=False) or ''
|
|
||||||
self._API_KEY = self._search_regex(
|
|
||||||
r'window\.gigyaApiKey\s*=\s*["\'](\w+)["\'];', webpage, 'API key',
|
|
||||||
fatal=False) or '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f'
|
|
||||||
|
|
||||||
cookies = self._get_cookies('https://auth-id.nfl.com/')
|
cookies = self._get_cookies('https://auth-id.nfl.com/')
|
||||||
login_token = traverse_obj(cookies, (
|
login_token = traverse_obj(cookies, (
|
||||||
(f'glt_{self._API_KEY}', lambda k, _: k.startswith('glt_')), {lambda x: x.value}), get_all=False)
|
(f'glt_{self._API_KEY}', lambda k, _: k.startswith('glt_')), {lambda x: x.value}), get_all=False)
|
||||||
|
@ -103,7 +99,7 @@ def _get_account_info(self, url, slug):
|
||||||
'or else try using --cookies-from-browser instead', expected=True)
|
'or else try using --cookies-from-browser instead', expected=True)
|
||||||
|
|
||||||
account = self._download_json(
|
account = self._download_json(
|
||||||
'https://auth-id.nfl.com/accounts.getAccountInfo', slug,
|
'https://auth-id.nfl.com/accounts.getAccountInfo', None,
|
||||||
note='Downloading account info', data=urlencode_postdata({
|
note='Downloading account info', data=urlencode_postdata({
|
||||||
'include': 'profile,data',
|
'include': 'profile,data',
|
||||||
'lang': 'en',
|
'lang': 'en',
|
||||||
|
@ -111,7 +107,7 @@ def _get_account_info(self, url, slug):
|
||||||
'sdk': 'js_latest',
|
'sdk': 'js_latest',
|
||||||
'login_token': login_token,
|
'login_token': login_token,
|
||||||
'authMode': 'cookie',
|
'authMode': 'cookie',
|
||||||
'pageURL': url,
|
'pageURL': 'https://www.nfl.com/',
|
||||||
'sdkBuild': traverse_obj(cookies, (
|
'sdkBuild': traverse_obj(cookies, (
|
||||||
'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='15170'),
|
'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='15170'),
|
||||||
'format': 'json',
|
'format': 'json',
|
||||||
|
@ -126,30 +122,50 @@ def _get_account_info(self, url, slug):
|
||||||
if len(self._ACCOUNT_INFO) != 3:
|
if len(self._ACCOUNT_INFO) != 3:
|
||||||
raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True)
|
raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True)
|
||||||
|
|
||||||
def _get_auth_token(self, url, slug):
|
def _get_auth_token(self):
|
||||||
if self._TOKEN and self._TOKEN_EXPIRY > int(time.time() + 30):
|
if self._TOKEN and self._TOKEN_EXPIRY > int(time.time() + 30):
|
||||||
return
|
return
|
||||||
|
|
||||||
if not self._ACCOUNT_INFO:
|
|
||||||
self._get_account_info(url, slug)
|
|
||||||
|
|
||||||
token = self._download_json(
|
token = self._download_json(
|
||||||
'https://api.nfl.com/identity/v3/token%s' % (
|
'https://api.nfl.com/identity/v3/token%s' % (
|
||||||
'/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''),
|
'/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''),
|
||||||
slug, headers={'Content-Type': 'application/json'}, note='Downloading access token',
|
None, headers={'Content-Type': 'application/json'}, note='Downloading access token',
|
||||||
data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode())
|
data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode())
|
||||||
|
|
||||||
self._TOKEN = token['accessToken']
|
self._TOKEN = token['accessToken']
|
||||||
self._TOKEN_EXPIRY = token['expiresIn']
|
self._TOKEN_EXPIRY = token['expiresIn']
|
||||||
self._ACCOUNT_INFO['refreshToken'] = token['refreshToken']
|
self._ACCOUNT_INFO['refreshToken'] = token['refreshToken']
|
||||||
|
|
||||||
|
def _extract_video(self, mcp_id):
|
||||||
|
self._get_auth_token()
|
||||||
|
data = self._download_json(
|
||||||
|
f'https://api.nfl.com/play/v1/asset/{mcp_id}', mcp_id, headers={
|
||||||
|
'Authorization': f'Bearer {self._TOKEN}',
|
||||||
|
'Accept': 'application/json',
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
}, data=json.dumps({'init': True}, separators=(',', ':')).encode())
|
||||||
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
data['accessUrl'], mcp_id, 'mp4', m3u8_id='hls')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': mcp_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'_old_archive_ids': [make_archive_id(AnvatoIE, mcp_id)],
|
||||||
|
**traverse_obj(data, ('metadata', {
|
||||||
|
'title': ('event', ('def_title', 'friendlyName'), {str}, any),
|
||||||
|
'description': ('event', 'def_description', {str}),
|
||||||
|
'duration': ('event', 'duration', {int_or_none}),
|
||||||
|
'thumbnails': ('thumbnails', ..., 'url', {'url': {url_or_none}}),
|
||||||
|
})),
|
||||||
|
}
|
||||||
|
|
||||||
def _parse_video_config(self, video_config, display_id):
|
def _parse_video_config(self, video_config, display_id):
|
||||||
video_config = self._parse_json(video_config, display_id)
|
video_config = self._parse_json(video_config, display_id)
|
||||||
item = video_config['playlist'][0]
|
item = video_config['playlist'][0]
|
||||||
mcp_id = item.get('mcpID')
|
if mcp_id := item.get('mcpID'):
|
||||||
if mcp_id:
|
return self._extract_video(mcp_id)
|
||||||
info = self.url_result(f'{self._ANVATO_PREFIX}{mcp_id}', AnvatoIE, mcp_id)
|
|
||||||
else:
|
|
||||||
media_id = item.get('id') or item['entityId']
|
media_id = item.get('id') or item['entityId']
|
||||||
title = item.get('title')
|
title = item.get('title')
|
||||||
item_url = item['url']
|
item_url = item['url']
|
||||||
|
@ -188,24 +204,19 @@ class NFLIE(NFLBaseIE):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Baker Mayfield's game-changing plays from 3-TD game Week 14",
|
'title': "Baker Mayfield's game-changing plays from 3-TD game Week 14",
|
||||||
'description': 'md5:85e05a3cc163f8c344340f220521136d',
|
'description': 'md5:85e05a3cc163f8c344340f220521136d',
|
||||||
'upload_date': '20201215',
|
'thumbnail': r're:https?://.+\.jpg',
|
||||||
'timestamp': 1608009755,
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'uploader': 'NFL',
|
|
||||||
'tags': 'count:6',
|
|
||||||
'duration': 157,
|
'duration': 157,
|
||||||
'categories': 'count:3',
|
'_old_archive_ids': ['anvato 899441'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown',
|
'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown',
|
||||||
'md5': '6886b32c24b463038c760ceb55a34566',
|
'md5': '92a517f05bd3eb50fe50244bc621aec8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'd87e8790-3e14-11eb-8ceb-ff05c2867f99',
|
'id': '8b7c3625-a461-4751-8db4-85f536f2bbd0',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown',
|
'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown',
|
||||||
'description': 'md5:12ada8ee70e6762658c30e223e095075',
|
'description': 'md5:12ada8ee70e6762658c30e223e095075',
|
||||||
},
|
},
|
||||||
'skip': 'HTTP Error 404: Not Found',
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14',
|
'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -236,13 +247,16 @@ class NFLArticleIE(NFLBaseIE):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
entries = []
|
|
||||||
|
def entries():
|
||||||
for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage):
|
for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage):
|
||||||
entries.append(self._parse_video_config(video_config, display_id))
|
yield self._parse_video_config(video_config, display_id)
|
||||||
|
|
||||||
title = clean_html(get_element_by_class(
|
title = clean_html(get_element_by_class(
|
||||||
'nfl-c-article__title', webpage)) or self._html_search_meta(
|
'nfl-c-article__title', webpage)) or self._html_search_meta(
|
||||||
['og:title', 'twitter:title'], webpage)
|
['og:title', 'twitter:title'], webpage)
|
||||||
return self.playlist_result(entries, display_id, title)
|
|
||||||
|
return self.playlist_result(entries(), display_id, title)
|
||||||
|
|
||||||
|
|
||||||
class NFLPlusReplayIE(NFLBaseIE):
|
class NFLPlusReplayIE(NFLBaseIE):
|
||||||
|
@ -307,6 +321,9 @@ class NFLPlusReplayIE(NFLBaseIE):
|
||||||
'all_22': 'All-22',
|
'all_22': 'All-22',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._get_account_info()
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
slug, video_id = self._match_valid_url(url).group('slug', 'id')
|
slug, video_id = self._match_valid_url(url).group('slug', 'id')
|
||||||
requested_types = self._configuration_arg('type', ['all'])
|
requested_types = self._configuration_arg('type', ['all'])
|
||||||
|
@ -315,7 +332,7 @@ def _real_extract(self, url):
|
||||||
requested_types = traverse_obj(self._REPLAY_TYPES, (None, requested_types))
|
requested_types = traverse_obj(self._REPLAY_TYPES, (None, requested_types))
|
||||||
|
|
||||||
if not video_id:
|
if not video_id:
|
||||||
self._get_auth_token(url, slug)
|
self._get_auth_token()
|
||||||
headers = {'Authorization': f'Bearer {self._TOKEN}'}
|
headers = {'Authorization': f'Bearer {self._TOKEN}'}
|
||||||
game_id = self._download_json(
|
game_id = self._download_json(
|
||||||
f'https://api.nfl.com/football/v2/games/externalId/slug/{slug}', slug,
|
f'https://api.nfl.com/football/v2/games/externalId/slug/{slug}', slug,
|
||||||
|
@ -328,14 +345,13 @@ def _real_extract(self, url):
|
||||||
'items', lambda _, v: v['subType'] == requested_types[0], 'mcpPlaybackId'), get_all=False)
|
'items', lambda _, v: v['subType'] == requested_types[0], 'mcpPlaybackId'), get_all=False)
|
||||||
|
|
||||||
if video_id:
|
if video_id:
|
||||||
return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
|
return self._extract_video(video_id)
|
||||||
|
|
||||||
def entries():
|
def entries():
|
||||||
for replay in traverse_obj(
|
for replay in traverse_obj(
|
||||||
replays, ('items', lambda _, v: v['mcpPlaybackId'] and v['subType'] in requested_types),
|
replays, ('items', lambda _, v: v['mcpPlaybackId'] and v['subType'] in requested_types),
|
||||||
):
|
):
|
||||||
video_id = replay['mcpPlaybackId']
|
yield self._extract_video(replay['mcpPlaybackId'])
|
||||||
yield self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
|
|
||||||
|
|
||||||
return self.playlist_result(entries(), slug)
|
return self.playlist_result(entries(), slug)
|
||||||
|
|
||||||
|
@ -362,12 +378,15 @@ class NFLPlusEpisodeIE(NFLBaseIE):
|
||||||
'params': {'skip_download': 'm3u8'},
|
'params': {'skip_download': 'm3u8'},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._get_account_info()
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
slug = self._match_id(url)
|
slug = self._match_id(url)
|
||||||
self._get_auth_token(url, slug)
|
self._get_auth_token()
|
||||||
video_id = self._download_json(
|
video_id = self._download_json(
|
||||||
f'https://api.nfl.com/content/v1/videos/episodes/{slug}', slug, headers={
|
f'https://api.nfl.com/content/v1/videos/episodes/{slug}', slug, headers={
|
||||||
'Authorization': f'Bearer {self._TOKEN}',
|
'Authorization': f'Bearer {self._TOKEN}',
|
||||||
})['mcpPlaybackId']
|
})['mcpPlaybackId']
|
||||||
|
|
||||||
return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
|
return self._extract_video(video_id)
|
||||||
|
|
Loading…
Reference in a new issue