From 781459836929b9a6dc85659bc4da2fd63c49c8ec Mon Sep 17 00:00:00 2001 From: DEVENU <89598703+devenu85@users.noreply.github.com> Date: Sun, 21 Jul 2024 12:08:56 +0530 Subject: [PATCH] [ie/Amazonminitv] extractor fix --- yt_dlp/extractor/amazonminitv.py | 213 ++++++------------------------- 1 file changed, 40 insertions(+), 173 deletions(-) diff --git a/yt_dlp/extractor/amazonminitv.py b/yt_dlp/extractor/amazonminitv.py index 0590a344a..9e68582af 100644 --- a/yt_dlp/extractor/amazonminitv.py +++ b/yt_dlp/extractor/amazonminitv.py @@ -9,35 +9,28 @@ def _real_initialize(self): self._download_webpage( 'https://www.amazon.in/minitv', None, note='Fetching guest session cookies') - AmazonMiniTVBaseIE.session_id = self._get_cookies('https://www.amazon.in')['session-id'].value + AmazonMiniTVBaseIE.urtk = self._get_cookies('https://www.amazon.in')['urtk'].value def _call_api(self, asin, data=None, note=None): - device = {'clientId': 'ATVIN', 'deviceLocale': 'en_GB'} + query = { + 'contentId':asin, + } if data: - data['variables'].update({ - 'contentType': 'VOD', - 'sessionIdToken': self.session_id, - **device, - }) + query.update(data) resp = self._download_json( - f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}', + f'https://www.amazon.in/minitv-pr/api/web/page/title', asin, note=note, headers={ 'Content-Type': 'application/json', + 'accounttype': 'NEW_GUEST_ACCOUNT', 'currentpageurl': '/', 'currentplatform': 'dWeb', - }, data=json.dumps(data).encode() if data else None, - query=None if data else { - 'deviceType': 'A1WMMUXPCUJL4N', - 'contentId': asin, - **device, - }) + }, data=None, + query=query) if resp.get('errors'): raise ExtractorError(f'MiniTV said: {resp["errors"][0]["message"]}') - elif not data: - return resp - return resp['data'][data['operationName']] + return resp class AmazonMiniTVIE(AmazonMiniTVBaseIE): @@ -89,79 +82,21 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE): 'only_matching': True, }] - _GRAPHQL_QUERY_CONTENT = ''' -query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, $contentType: ContentType!, $clientId: String) { - content( - applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId} - contentId: $contentId - contentType: $contentType - ) { - contentId - name - ... on Episode { - contentId - vodType - name - images - description { - synopsis - contentLengthInSeconds - } - publicReleaseDateUTC - audioTracks - seasonId - seriesId - seriesName - seasonNumber - episodeNumber - timecode { - endCreditsTime - } - } - ... on MovieContent { - contentId - vodType - name - description { - synopsis - contentLengthInSeconds - } - images - publicReleaseDateUTC - audioTracks - } - } -}''' - def _real_extract(self, url): asin = f'amzn1.dv.gti.{self._match_id(url)}' prs = self._call_api(asin, note='Downloading playback info') + playback_info = prs['widgets'][0]['data']['playbackAssets']['manifestData'] + title_info = prs['widgets'][0]['data']['contentDetails'] + title_info_ = prs['metaData']['contentDetails'] formats, subtitles = [], {} - for type_, asset in prs['playbackAssets'].items(): - if not traverse_obj(asset, 'manifestUrl'): - continue - if type_ == 'hls': - m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles( - asset['manifestUrl'], asin, ext='mp4', entry_protocol='m3u8_native', - m3u8_id=type_, fatal=False) - formats.extend(m3u8_fmts) - subtitles = self._merge_subtitles(subtitles, m3u8_subs) - elif type_ == 'dash': - mpd_fmts, mpd_subs = self._extract_mpd_formats_and_subtitles( - asset['manifestUrl'], asin, mpd_id=type_, fatal=False) - formats.extend(mpd_fmts) - subtitles = self._merge_subtitles(subtitles, mpd_subs) - else: - self.report_warning(f'Unknown asset type: {type_}') + for mpd in playback_info: + mpd_fmts, mpd_subs = self._extract_mpd_formats_and_subtitles( + mpd['manifestURL'], asin, mpd_id=mpd['codec'], fatal=False) + formats.extend(mpd_fmts) + subtitles = self._merge_subtitles(subtitles, mpd_subs) - title_info = self._call_api( - asin, note='Downloading title info', data={ - 'operationName': 'content', - 'variables': {'contentId': asin}, - 'query': self._GRAPHQL_QUERY_CONTENT, - }) - credits_time = try_get(title_info, lambda x: x['timecode']['endCreditsTime'] / 1000) + credits_time = try_get(title_info, lambda x: x['skipData']['INTRO']['endTime']) is_episode = title_info.get('vodType') == 'EPISODE' return { @@ -171,22 +106,25 @@ def _real_extract(self, url): 'subtitles': subtitles, 'language': traverse_obj(title_info, ('audioTracks', 0)), 'thumbnails': [{ - 'id': type_, - 'url': url, - } for type_, url in (title_info.get('images') or {}).items()], - 'description': traverse_obj(title_info, ('description', 'synopsis')), - 'release_timestamp': int_or_none(try_get(title_info, lambda x: x['publicReleaseDateUTC'] / 1000)), + 'id': "thumbnailImage", + 'url': title_info.get('thumbnailImage'), + },{ + 'id':"seasonThumbnailImage", + 'url': title_info.get('thumbnailImage') + }], + 'description': traverse_obj(title_info_, ('description', 'synopsis')), + 'release_timestamp': int_or_none(try_get(title_info_, lambda x: x['publicReleaseDateUTC'] / 1000)), 'duration': traverse_obj(title_info, ('description', 'contentLengthInSeconds')), 'chapters': [{ 'start_time': credits_time, 'title': 'End Credits', }] if credits_time else [], - 'series': title_info.get('seriesName'), - 'series_id': title_info.get('seriesId'), - 'season_number': title_info.get('seasonNumber'), - 'season_id': title_info.get('seasonId'), + 'series': title_info_.get('seasonName') if is_episode else None, + 'series_id': title_info.get('seriesId') if is_episode else None, + 'season_number': title_info.get('seasonNumber') if is_episode else None, + 'season_id': title_info.get('seasonId') if is_episode else None, 'episode': title_info.get('name') if is_episode else None, - 'episode_number': title_info.get('episodeNumber'), + 'episode_number': title_info.get('episodeNumber') if is_episode else None, 'episode_id': asin if is_episode else None, } @@ -206,88 +144,17 @@ class AmazonMiniTVSeasonIE(AmazonMiniTVBaseIE): 'only_matching': True, }] - _GRAPHQL_QUERY = ''' -query getEpisodes($sessionIdToken: String!, $clientId: String, $episodeOrSeasonId: ID!, $deviceLocale: String) { - getEpisodes( - applicationContextInput: {sessionIdToken: $sessionIdToken, deviceLocale: $deviceLocale, clientId: $clientId} - episodeOrSeasonId: $episodeOrSeasonId - ) { - episodes { - ... on Episode { - contentId - name - images - seriesName - seasonId - seriesId - seasonNumber - episodeNumber - description { - synopsis - contentLengthInSeconds - } - publicReleaseDateUTC - } - } - } -} -''' - def _entries(self, asin): season_info = self._call_api( - asin, note='Downloading season info', data={ - 'operationName': 'getEpisodes', - 'variables': {'episodeOrSeasonId': asin}, - 'query': self._GRAPHQL_QUERY, - }) + asin, note='Downloading season info', + data={'cursor': '8e0cefec-e190-46ba-854d-1f3ca7978b4a:::'} + ) - for episode in season_info['episodes']: - yield self.url_result( - f'amazonminitv:{episode["contentId"]}', AmazonMiniTVIE, episode['contentId']) - - def _real_extract(self, url): - asin = f'amzn1.dv.gti.{self._match_id(url)}' - return self.playlist_result(self._entries(asin), asin) - - -class AmazonMiniTVSeriesIE(AmazonMiniTVBaseIE): - IE_NAME = 'amazonminitv:series' - _VALID_URL = r'amazonminitv:series:(?:amzn1\.dv\.gti\.)?(?P[a-f0-9-]+)' - IE_DESC = 'Amazon MiniTV Series, "minitv:series:" prefix' - _TESTS = [{ - 'url': 'amazonminitv:series:amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0', - 'playlist_mincount': 3, - 'info_dict': { - 'id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0', - }, - }, { - 'url': 'amazonminitv:series:56521d46-b040-4fd5-872e-3e70476a04b0', - 'only_matching': True, - }] - - _GRAPHQL_QUERY = ''' -query getSeasons($sessionIdToken: String!, $deviceLocale: String, $episodeOrSeasonOrSeriesId: ID!, $clientId: String) { - getSeasons( - applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId} - episodeOrSeasonOrSeriesId: $episodeOrSeasonOrSeriesId - ) { - seasons { - seasonId - } - } -} -''' - - def _entries(self, asin): - season_info = self._call_api( - asin, note='Downloading series info', data={ - 'operationName': 'getSeasons', - 'variables': {'episodeOrSeasonOrSeriesId': asin}, - 'query': self._GRAPHQL_QUERY, - }) - - for season in season_info['seasons']: - yield self.url_result(f'amazonminitv:season:{season["seasonId"]}', AmazonMiniTVSeasonIE, season['seasonId']) + for season in season_info['widgets'][0]['data']['options']: + if season['active']: + for episode in season['value']['data']['widgets'][0]['data']['widgets']: + yield self.url_result( + f'amazonminitv:{episode["data"]["contentId"]}', AmazonMiniTVIE, episode['data']['contentId']) def _real_extract(self, url): asin = f'amzn1.dv.gti.{self._match_id(url)}'