[ie/Amazonminitv] extractor fix

This commit is contained in:
DEVENU 2024-07-21 12:08:56 +05:30 committed by GitHub
parent 7121179075
commit 7814598369
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -9,35 +9,28 @@ def _real_initialize(self):
self._download_webpage( self._download_webpage(
'https://www.amazon.in/minitv', None, 'https://www.amazon.in/minitv', None,
note='Fetching guest session cookies') note='Fetching guest session cookies')
AmazonMiniTVBaseIE.session_id = self._get_cookies('https://www.amazon.in')['session-id'].value AmazonMiniTVBaseIE.urtk = self._get_cookies('https://www.amazon.in')['urtk'].value
def _call_api(self, asin, data=None, note=None): def _call_api(self, asin, data=None, note=None):
device = {'clientId': 'ATVIN', 'deviceLocale': 'en_GB'} query = {
'contentId':asin,
}
if data: if data:
data['variables'].update({ query.update(data)
'contentType': 'VOD',
'sessionIdToken': self.session_id,
**device,
})
resp = self._download_json( resp = self._download_json(
f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}', f'https://www.amazon.in/minitv-pr/api/web/page/title',
asin, note=note, headers={ asin, note=note, headers={
'Content-Type': 'application/json', 'Content-Type': 'application/json',
'accounttype': 'NEW_GUEST_ACCOUNT',
'currentpageurl': '/', 'currentpageurl': '/',
'currentplatform': 'dWeb', 'currentplatform': 'dWeb',
}, data=json.dumps(data).encode() if data else None, }, data=None,
query=None if data else { query=query)
'deviceType': 'A1WMMUXPCUJL4N',
'contentId': asin,
**device,
})
if resp.get('errors'): if resp.get('errors'):
raise ExtractorError(f'MiniTV said: {resp["errors"][0]["message"]}') raise ExtractorError(f'MiniTV said: {resp["errors"][0]["message"]}')
elif not data:
return resp return resp
return resp['data'][data['operationName']]
class AmazonMiniTVIE(AmazonMiniTVBaseIE): class AmazonMiniTVIE(AmazonMiniTVBaseIE):
@ -89,79 +82,21 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
_GRAPHQL_QUERY_CONTENT = '''
query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, $contentType: ContentType!, $clientId: String) {
content(
applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId}
contentId: $contentId
contentType: $contentType
) {
contentId
name
... on Episode {
contentId
vodType
name
images
description {
synopsis
contentLengthInSeconds
}
publicReleaseDateUTC
audioTracks
seasonId
seriesId
seriesName
seasonNumber
episodeNumber
timecode {
endCreditsTime
}
}
... on MovieContent {
contentId
vodType
name
description {
synopsis
contentLengthInSeconds
}
images
publicReleaseDateUTC
audioTracks
}
}
}'''
def _real_extract(self, url): def _real_extract(self, url):
asin = f'amzn1.dv.gti.{self._match_id(url)}' asin = f'amzn1.dv.gti.{self._match_id(url)}'
prs = self._call_api(asin, note='Downloading playback info') prs = self._call_api(asin, note='Downloading playback info')
playback_info = prs['widgets'][0]['data']['playbackAssets']['manifestData']
title_info = prs['widgets'][0]['data']['contentDetails']
title_info_ = prs['metaData']['contentDetails']
formats, subtitles = [], {} formats, subtitles = [], {}
for type_, asset in prs['playbackAssets'].items(): for mpd in playback_info:
if not traverse_obj(asset, 'manifestUrl'):
continue
if type_ == 'hls':
m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
asset['manifestUrl'], asin, ext='mp4', entry_protocol='m3u8_native',
m3u8_id=type_, fatal=False)
formats.extend(m3u8_fmts)
subtitles = self._merge_subtitles(subtitles, m3u8_subs)
elif type_ == 'dash':
mpd_fmts, mpd_subs = self._extract_mpd_formats_and_subtitles( mpd_fmts, mpd_subs = self._extract_mpd_formats_and_subtitles(
asset['manifestUrl'], asin, mpd_id=type_, fatal=False) mpd['manifestURL'], asin, mpd_id=mpd['codec'], fatal=False)
formats.extend(mpd_fmts) formats.extend(mpd_fmts)
subtitles = self._merge_subtitles(subtitles, mpd_subs) subtitles = self._merge_subtitles(subtitles, mpd_subs)
else:
self.report_warning(f'Unknown asset type: {type_}')
title_info = self._call_api( credits_time = try_get(title_info, lambda x: x['skipData']['INTRO']['endTime'])
asin, note='Downloading title info', data={
'operationName': 'content',
'variables': {'contentId': asin},
'query': self._GRAPHQL_QUERY_CONTENT,
})
credits_time = try_get(title_info, lambda x: x['timecode']['endCreditsTime'] / 1000)
is_episode = title_info.get('vodType') == 'EPISODE' is_episode = title_info.get('vodType') == 'EPISODE'
return { return {
@ -171,22 +106,25 @@ def _real_extract(self, url):
'subtitles': subtitles, 'subtitles': subtitles,
'language': traverse_obj(title_info, ('audioTracks', 0)), 'language': traverse_obj(title_info, ('audioTracks', 0)),
'thumbnails': [{ 'thumbnails': [{
'id': type_, 'id': "thumbnailImage",
'url': url, 'url': title_info.get('thumbnailImage'),
} for type_, url in (title_info.get('images') or {}).items()], },{
'description': traverse_obj(title_info, ('description', 'synopsis')), 'id':"seasonThumbnailImage",
'release_timestamp': int_or_none(try_get(title_info, lambda x: x['publicReleaseDateUTC'] / 1000)), 'url': title_info.get('thumbnailImage')
}],
'description': traverse_obj(title_info_, ('description', 'synopsis')),
'release_timestamp': int_or_none(try_get(title_info_, lambda x: x['publicReleaseDateUTC'] / 1000)),
'duration': traverse_obj(title_info, ('description', 'contentLengthInSeconds')), 'duration': traverse_obj(title_info, ('description', 'contentLengthInSeconds')),
'chapters': [{ 'chapters': [{
'start_time': credits_time, 'start_time': credits_time,
'title': 'End Credits', 'title': 'End Credits',
}] if credits_time else [], }] if credits_time else [],
'series': title_info.get('seriesName'), 'series': title_info_.get('seasonName') if is_episode else None,
'series_id': title_info.get('seriesId'), 'series_id': title_info.get('seriesId') if is_episode else None,
'season_number': title_info.get('seasonNumber'), 'season_number': title_info.get('seasonNumber') if is_episode else None,
'season_id': title_info.get('seasonId'), 'season_id': title_info.get('seasonId') if is_episode else None,
'episode': title_info.get('name') if is_episode else None, 'episode': title_info.get('name') if is_episode else None,
'episode_number': title_info.get('episodeNumber'), 'episode_number': title_info.get('episodeNumber') if is_episode else None,
'episode_id': asin if is_episode else None, 'episode_id': asin if is_episode else None,
} }
@ -206,88 +144,17 @@ class AmazonMiniTVSeasonIE(AmazonMiniTVBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
_GRAPHQL_QUERY = '''
query getEpisodes($sessionIdToken: String!, $clientId: String, $episodeOrSeasonId: ID!, $deviceLocale: String) {
getEpisodes(
applicationContextInput: {sessionIdToken: $sessionIdToken, deviceLocale: $deviceLocale, clientId: $clientId}
episodeOrSeasonId: $episodeOrSeasonId
) {
episodes {
... on Episode {
contentId
name
images
seriesName
seasonId
seriesId
seasonNumber
episodeNumber
description {
synopsis
contentLengthInSeconds
}
publicReleaseDateUTC
}
}
}
}
'''
def _entries(self, asin): def _entries(self, asin):
season_info = self._call_api( season_info = self._call_api(
asin, note='Downloading season info', data={ asin, note='Downloading season info',
'operationName': 'getEpisodes', data={'cursor': '8e0cefec-e190-46ba-854d-1f3ca7978b4a:::'}
'variables': {'episodeOrSeasonId': asin}, )
'query': self._GRAPHQL_QUERY,
})
for episode in season_info['episodes']: for season in season_info['widgets'][0]['data']['options']:
if season['active']:
for episode in season['value']['data']['widgets'][0]['data']['widgets']:
yield self.url_result( yield self.url_result(
f'amazonminitv:{episode["contentId"]}', AmazonMiniTVIE, episode['contentId']) f'amazonminitv:{episode["data"]["contentId"]}', AmazonMiniTVIE, episode['data']['contentId'])
def _real_extract(self, url):
asin = f'amzn1.dv.gti.{self._match_id(url)}'
return self.playlist_result(self._entries(asin), asin)
class AmazonMiniTVSeriesIE(AmazonMiniTVBaseIE):
IE_NAME = 'amazonminitv:series'
_VALID_URL = r'amazonminitv:series:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
IE_DESC = 'Amazon MiniTV Series, "minitv:series:" prefix'
_TESTS = [{
'url': 'amazonminitv:series:amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
'playlist_mincount': 3,
'info_dict': {
'id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
},
}, {
'url': 'amazonminitv:series:56521d46-b040-4fd5-872e-3e70476a04b0',
'only_matching': True,
}]
_GRAPHQL_QUERY = '''
query getSeasons($sessionIdToken: String!, $deviceLocale: String, $episodeOrSeasonOrSeriesId: ID!, $clientId: String) {
getSeasons(
applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId}
episodeOrSeasonOrSeriesId: $episodeOrSeasonOrSeriesId
) {
seasons {
seasonId
}
}
}
'''
def _entries(self, asin):
season_info = self._call_api(
asin, note='Downloading series info', data={
'operationName': 'getSeasons',
'variables': {'episodeOrSeasonOrSeriesId': asin},
'query': self._GRAPHQL_QUERY,
})
for season in season_info['seasons']:
yield self.url_result(f'amazonminitv:season:{season["seasonId"]}', AmazonMiniTVSeasonIE, season['seasonId'])
def _real_extract(self, url): def _real_extract(self, url):
asin = f'amzn1.dv.gti.{self._match_id(url)}' asin = f'amzn1.dv.gti.{self._match_id(url)}'