From 6f258afb55795b82b93a30abbd39037eaaf1424d Mon Sep 17 00:00:00 2001 From: c0d3d3v Date: Fri, 10 Mar 2023 13:43:22 +0100 Subject: [PATCH 01/10] [extractor/echo360] Initial implementation with only hls format --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/echo360.py | 141 ++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+) create mode 100644 yt_dlp/extractor/echo360.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 01281b5a1..f804d458e 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -511,6 +511,7 @@ from .dw import ( from .eagleplatform import EaglePlatformIE, ClipYouEmbedIE from .ebaumsworld import EbaumsWorldIE from .ebay import EbayIE +from .echo360 import Echo360IE from .echomsk import EchoMskIE from .egghead import ( EggheadCourseIE, diff --git a/yt_dlp/extractor/echo360.py b/yt_dlp/extractor/echo360.py new file mode 100644 index 000000000..218261238 --- /dev/null +++ b/yt_dlp/extractor/echo360.py @@ -0,0 +1,141 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + ExtractorError, + traverse_obj, + variadic, +) + + +class Echo360BaseIE(InfoExtractor): + _INSTANCES_RE = r'''(?: + echo360\.ca| + echo360\.net\.au| + echo360\.org\.au| + echo360\.org\.uk| + echo360\.org| + )''' + _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}' + + def _call_api(self, host, video_id, media_id, session_token, **kwargs): + return self._download_json( + self._API_BASE % (host, video_id, media_id), + video_id, + headers={'Authorization': f'Bearer {session_token}'}, + **kwargs, + ) + + @staticmethod + def _update_url_query(uri, query_string): + if query_string is not None: + return f'{uri.split("?", 1)[0]}?{query_string}' + return uri + + @staticmethod + def _get_query_string(uri, query_strings): + uri_base = uri.split("?", 1)[0] + for query_string in query_strings: + if re.match(query_string['uriPattern'], uri_base): + return query_string['queryString'] + return None + + def _parse_mediapackage(self, video): + video_id = traverse_obj(video, ('playableAudioVideo', 'mediaId')) + if video_id is None: + raise ExtractorError('Video id was not found') + query_strings = variadic(traverse_obj(video, ('sourceQueryStrings', 'queryStrings')) or []) + duration = float(re.match(r'PT(\d+\.?\d+)S', traverse_obj(video, ('playableAudioVideo', 'duration')))[1]) + + formats = [] + for track in variadic(traverse_obj(video, ('playableAudioVideo', 'playableMedias')) or []): + href = track.get('uri') + if href is None: + continue + href = self._update_url_query(href, self._get_query_string(href, query_strings)) + ext = determine_ext(href, None) + is_hls = track.get('isHls') + is_live = track.get('isLive') + + if is_hls or ext == 'm3u8': + hls_formats = self._extract_m3u8_formats( + href, video_id, live=is_live, m3u8_id='hls', entry_protocol='m3u8_native', fatal=False + ) + + for hls_format in hls_formats: + query_string = self._get_query_string(hls_format['url'], query_strings) + if query_string is not None: + hls_format['extra_param_to_segment_url'] = query_string + hls_format['url'] = self._update_url_query(hls_format['url'], query_string) + + formats.extend(hls_formats) + + return { + 'id': video_id, + 'formats': formats, + 'title': video.get('mediaName'), + 'duration': duration, + } + + +class Echo360IE(Echo360BaseIE): + _VALID_URL = rf'''(?x) + https?://(?P{Echo360BaseIE._INSTANCES_RE}) + /media/(?P{Echo360BaseIE._UUID_RE})/public''' + + _API_BASE = 'https://%s/api/ui/echoplayer/public-links/%s/media/%s/player-properties' + + _TESTS = [ + { + 'url': 'https://echo360.org.uk/media/1d8392aa-a3e7-4e78-94cf-b6532c27208c/public', + 'info_dict': { + 'id': '3c7ae6e0-fa19-432d-aa21-c283b4276f2a', + 'ext': 'mp4', + 'title': '3-4 Force + moment + mechanics.mp4', + 'duration': 4731.888, + }, + 'params': {'skip_download': 'm3u8'} + }, + { + 'url': 'https://echo360.net.au/media/f04960a9-2efc-4b63-87b5-72e629081d15/public', + 'info_dict': { + 'id': '6098a147-2d65-40f3-b9e9-a0204afe450c', + 'ext': 'mp4', + 'title': 'EXSC634_Online_Workshop_Week_4.mp4', + 'duration': 6659.72, + }, + 'params': {'skip_download': 'm3u8'} + }, + ] + + def _real_extract(self, url): + host, video_id = self._match_valid_url(url).group('host', 'id') + webpage = self._download_webpage(url, video_id) + + media_id = self._search_regex(rf'\\"mediaId\\":\\"({Echo360BaseIE._UUID_RE})\\"', webpage, 'media id') + session_id = self._search_regex(rf'\\"sessionId\\":\\"({Echo360BaseIE._UUID_RE})\\"', webpage, 'session id') + + share_link_id = self._search_regex( + rf'\\"shareLinkId\\":\\"({Echo360BaseIE._UUID_RE})\\"', webpage, + 'share link id', default=None, fatal=False) + + public_link_id = self._search_regex( + rf'\\"publicLinkId\\":\\"({Echo360BaseIE._UUID_RE})\\"', webpage, + 'public link id', default=None, fatal=False) + + real_video_id = share_link_id or public_link_id + if real_video_id is None: + raise ExtractorError('Video id was not found') + + urlh = self._request_webpage( + f'https://{host}/api/ui/sessions/{session_id}', + video_id, + note='Open video session', + errnote='Unable to open video session', + ) + session_token = urlh.headers.get('Token') + if session_token is None: + raise ExtractorError('Video session could not be opened') + + return self._parse_mediapackage(self._call_api(host, real_video_id, media_id, session_token)['data']) From a2e9c5ba97e3c6e4049176ba6f79899b7a0dcebf Mon Sep 17 00:00:00 2001 From: c0d3d3v Date: Sat, 11 Mar 2023 14:05:16 +0100 Subject: [PATCH 02/10] [extractor/echo360] little code cleanup; do not fail if video duration is not found --- yt_dlp/extractor/echo360.py | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/yt_dlp/extractor/echo360.py b/yt_dlp/extractor/echo360.py index 218261238..19ca27f0a 100644 --- a/yt_dlp/extractor/echo360.py +++ b/yt_dlp/extractor/echo360.py @@ -4,6 +4,7 @@ from .common import InfoExtractor from ..utils import ( determine_ext, ExtractorError, + float_or_none, traverse_obj, variadic, ) @@ -21,11 +22,8 @@ class Echo360BaseIE(InfoExtractor): def _call_api(self, host, video_id, media_id, session_token, **kwargs): return self._download_json( - self._API_BASE % (host, video_id, media_id), - video_id, - headers={'Authorization': f'Bearer {session_token}'}, - **kwargs, - ) + self._API_BASE % (host, video_id, media_id), video_id, + headers={'Authorization': f'Bearer {session_token}'}, **kwargs) @staticmethod def _update_url_query(uri, query_string): @@ -45,8 +43,10 @@ class Echo360BaseIE(InfoExtractor): video_id = traverse_obj(video, ('playableAudioVideo', 'mediaId')) if video_id is None: raise ExtractorError('Video id was not found') - query_strings = variadic(traverse_obj(video, ('sourceQueryStrings', 'queryStrings')) or []) - duration = float(re.match(r'PT(\d+\.?\d+)S', traverse_obj(video, ('playableAudioVideo', 'duration')))[1]) + query_strings = traverse_obj(video, ('sourceQueryStrings', 'queryStrings')) or [] + duration = float_or_none(self._search_regex( + r'PT(\d+\.?\d+)S', traverse_obj(video, ('playableAudioVideo', 'duration')), + 'video duration', default=None, fatal=False)) formats = [] for track in variadic(traverse_obj(video, ('playableAudioVideo', 'playableMedias')) or []): @@ -54,20 +54,15 @@ class Echo360BaseIE(InfoExtractor): if href is None: continue href = self._update_url_query(href, self._get_query_string(href, query_strings)) - ext = determine_ext(href, None) - is_hls = track.get('isHls') - is_live = track.get('isLive') - - if is_hls or ext == 'm3u8': + if track.get('isHls') or determine_ext(href, None) == 'm3u8': hls_formats = self._extract_m3u8_formats( - href, video_id, live=is_live, m3u8_id='hls', entry_protocol='m3u8_native', fatal=False + href, video_id, live=track.get('isLive'), m3u8_id='hls', entry_protocol='m3u8_native', fatal=False ) for hls_format in hls_formats: query_string = self._get_query_string(hls_format['url'], query_strings) - if query_string is not None: - hls_format['extra_param_to_segment_url'] = query_string - hls_format['url'] = self._update_url_query(hls_format['url'], query_string) + hls_format['extra_param_to_segment_url'] = query_string + hls_format['url'] = self._update_url_query(hls_format['url'], query_string) formats.extend(hls_formats) From 577df3dbf10d18b73a758b9241082b735d57e17a Mon Sep 17 00:00:00 2001 From: c0d3d3v Date: Sun, 12 Mar 2023 00:10:24 +0100 Subject: [PATCH 03/10] [extractor/echo360] read in mediaPlayerBootstrapApp json instead of searching for single elements --- yt_dlp/extractor/echo360.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/yt_dlp/extractor/echo360.py b/yt_dlp/extractor/echo360.py index 19ca27f0a..2641ab967 100644 --- a/yt_dlp/extractor/echo360.py +++ b/yt_dlp/extractor/echo360.py @@ -108,29 +108,22 @@ class Echo360IE(Echo360BaseIE): host, video_id = self._match_valid_url(url).group('host', 'id') webpage = self._download_webpage(url, video_id) - media_id = self._search_regex(rf'\\"mediaId\\":\\"({Echo360BaseIE._UUID_RE})\\"', webpage, 'media id') - session_id = self._search_regex(rf'\\"sessionId\\":\\"({Echo360BaseIE._UUID_RE})\\"', webpage, 'session id') + player_config = self._parse_json(self._search_regex( + r'Echo\["mediaPlayerBootstrapApp"\]\("({[^}]*})"\);', webpage, 'player config').replace('\\"', "\""), + video_id) - share_link_id = self._search_regex( - rf'\\"shareLinkId\\":\\"({Echo360BaseIE._UUID_RE})\\"', webpage, - 'share link id', default=None, fatal=False) - - public_link_id = self._search_regex( - rf'\\"publicLinkId\\":\\"({Echo360BaseIE._UUID_RE})\\"', webpage, - 'public link id', default=None, fatal=False) - - real_video_id = share_link_id or public_link_id + real_video_id = player_config.get('shareLinkId') or player_config.get('publicLinkId') if real_video_id is None: raise ExtractorError('Video id was not found') urlh = self._request_webpage( - f'https://{host}/api/ui/sessions/{session_id}', + f'https://{host}/api/ui/sessions/{player_config["sessionId"]}', video_id, note='Open video session', errnote='Unable to open video session', ) session_token = urlh.headers.get('Token') if session_token is None: - raise ExtractorError('Video session could not be opened') + raise ExtractorError('No session token received') - return self._parse_mediapackage(self._call_api(host, real_video_id, media_id, session_token)['data']) + return self._parse_mediapackage(self._call_api(host, real_video_id, player_config['mediaId'], session_token)['data']) From 9e2f1ecc3832bb329e5f8d68314566a21af7db2d Mon Sep 17 00:00:00 2001 From: c0d3d3v Date: Sun, 12 Mar 2023 20:38:21 +0100 Subject: [PATCH 04/10] [extractor/echo360] Apply most suggestions by @HobbyistDev --- yt_dlp/extractor/echo360.py | 124 ++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 68 deletions(-) diff --git a/yt_dlp/extractor/echo360.py b/yt_dlp/extractor/echo360.py index 2641ab967..5be6d0f86 100644 --- a/yt_dlp/extractor/echo360.py +++ b/yt_dlp/extractor/echo360.py @@ -3,14 +3,13 @@ import re from .common import InfoExtractor from ..utils import ( determine_ext, - ExtractorError, float_or_none, traverse_obj, variadic, ) -class Echo360BaseIE(InfoExtractor): +class Echo360IE(InfoExtractor): _INSTANCES_RE = r'''(?: echo360\.ca| echo360\.net\.au| @@ -19,65 +18,9 @@ class Echo360BaseIE(InfoExtractor): echo360\.org| )''' _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}' - - def _call_api(self, host, video_id, media_id, session_token, **kwargs): - return self._download_json( - self._API_BASE % (host, video_id, media_id), video_id, - headers={'Authorization': f'Bearer {session_token}'}, **kwargs) - - @staticmethod - def _update_url_query(uri, query_string): - if query_string is not None: - return f'{uri.split("?", 1)[0]}?{query_string}' - return uri - - @staticmethod - def _get_query_string(uri, query_strings): - uri_base = uri.split("?", 1)[0] - for query_string in query_strings: - if re.match(query_string['uriPattern'], uri_base): - return query_string['queryString'] - return None - - def _parse_mediapackage(self, video): - video_id = traverse_obj(video, ('playableAudioVideo', 'mediaId')) - if video_id is None: - raise ExtractorError('Video id was not found') - query_strings = traverse_obj(video, ('sourceQueryStrings', 'queryStrings')) or [] - duration = float_or_none(self._search_regex( - r'PT(\d+\.?\d+)S', traverse_obj(video, ('playableAudioVideo', 'duration')), - 'video duration', default=None, fatal=False)) - - formats = [] - for track in variadic(traverse_obj(video, ('playableAudioVideo', 'playableMedias')) or []): - href = track.get('uri') - if href is None: - continue - href = self._update_url_query(href, self._get_query_string(href, query_strings)) - if track.get('isHls') or determine_ext(href, None) == 'm3u8': - hls_formats = self._extract_m3u8_formats( - href, video_id, live=track.get('isLive'), m3u8_id='hls', entry_protocol='m3u8_native', fatal=False - ) - - for hls_format in hls_formats: - query_string = self._get_query_string(hls_format['url'], query_strings) - hls_format['extra_param_to_segment_url'] = query_string - hls_format['url'] = self._update_url_query(hls_format['url'], query_string) - - formats.extend(hls_formats) - - return { - 'id': video_id, - 'formats': formats, - 'title': video.get('mediaName'), - 'duration': duration, - } - - -class Echo360IE(Echo360BaseIE): _VALID_URL = rf'''(?x) - https?://(?P{Echo360BaseIE._INSTANCES_RE}) - /media/(?P{Echo360BaseIE._UUID_RE})/public''' + https?://(?P{_INSTANCES_RE}) + /media/(?P{_UUID_RE})/public''' _API_BASE = 'https://%s/api/ui/echoplayer/public-links/%s/media/%s/player-properties' @@ -104,6 +47,56 @@ class Echo360IE(Echo360BaseIE): }, ] + def _call_api(self, host, video_id, media_id, session_token, **kwargs): + return self._download_json( + self._API_BASE % (host, video_id, media_id), video_id, + headers={'Authorization': f'Bearer {session_token}'}, **kwargs) + + @staticmethod + def _update_url_query(uri, query_string): + if query_string is not None: + return f'{uri.split("?", 1)[0]}?{query_string}' + return uri + + @staticmethod + def _get_query_string(uri, query_strings): + uri_base = uri.split("?", 1)[0] + for query_string in query_strings: + if re.match(query_string['uriPattern'], uri_base): + return query_string['queryString'] + return None + + def _parse_mediapackage(self, video): + video_id = video['playableAudioVideo']['mediaId'] + query_strings = traverse_obj(video, ('sourceQueryStrings', 'queryStrings')) or [] + + formats = [] + for track in variadic(traverse_obj(video, ('playableAudioVideo', 'playableMedias')) or []): + href = track.get('uri') + if href is None: + continue + href = self._update_url_query(href, self._get_query_string(href, query_strings)) + if track.get('isHls') or determine_ext(href, None) == 'm3u8': + hls_formats = self._extract_m3u8_formats( + href, video_id, live=track.get('isLive'), m3u8_id='hls', entry_protocol='m3u8_native', fatal=False + ) + + for hls_format in hls_formats: + query_string = self._get_query_string(hls_format['url'], query_strings) + hls_format['extra_param_to_segment_url'] = query_string + hls_format['url'] = self._update_url_query(hls_format['url'], query_string) + + formats.extend(hls_formats) + + return { + 'id': video_id, + 'formats': formats, + 'title': video.get('mediaName'), + 'duration': float_or_none(self._search_regex( + r'PT(\d+\.?\d+)S', traverse_obj(video, ('playableAudioVideo', 'duration')), + 'video duration', default=None, fatal=False)), + } + def _real_extract(self, url): host, video_id = self._match_valid_url(url).group('host', 'id') webpage = self._download_webpage(url, video_id) @@ -112,18 +105,13 @@ class Echo360IE(Echo360BaseIE): r'Echo\["mediaPlayerBootstrapApp"\]\("({[^}]*})"\);', webpage, 'player config').replace('\\"', "\""), video_id) - real_video_id = player_config.get('shareLinkId') or player_config.get('publicLinkId') - if real_video_id is None: - raise ExtractorError('Video id was not found') - urlh = self._request_webpage( f'https://{host}/api/ui/sessions/{player_config["sessionId"]}', video_id, note='Open video session', errnote='Unable to open video session', ) - session_token = urlh.headers.get('Token') - if session_token is None: - raise ExtractorError('No session token received') - return self._parse_mediapackage(self._call_api(host, real_video_id, player_config['mediaId'], session_token)['data']) + return self._parse_mediapackage(self._call_api( + host, player_config.get('shareLinkId') or player_config['publicLinkId'], player_config['mediaId'], + urlh.headers['Token'])['data']) From 32fc35d305c0352e1db33ef23f7a3a72690af253 Mon Sep 17 00:00:00 2001 From: c0d3d3v Date: Sun, 12 Mar 2023 21:38:37 +0100 Subject: [PATCH 05/10] [extractor/echo360] use _search_json instead of _parse_json as recommended by @pukkandan --- yt_dlp/extractor/echo360.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/echo360.py b/yt_dlp/extractor/echo360.py index 5be6d0f86..de5c3e573 100644 --- a/yt_dlp/extractor/echo360.py +++ b/yt_dlp/extractor/echo360.py @@ -101,9 +101,8 @@ class Echo360IE(InfoExtractor): host, video_id = self._match_valid_url(url).group('host', 'id') webpage = self._download_webpage(url, video_id) - player_config = self._parse_json(self._search_regex( - r'Echo\["mediaPlayerBootstrapApp"\]\("({[^}]*})"\);', webpage, 'player config').replace('\\"', "\""), - video_id) + player_config = self._search_json(r'Echo\["mediaPlayerBootstrapApp"\]\("', webpage, 'player config', video_id, + transform_source=lambda x: x.replace(R'\"', '"')) urlh = self._request_webpage( f'https://{host}/api/ui/sessions/{player_config["sessionId"]}', From 8c5254541089bc96101e82f23765e1c5ce6fdb94 Mon Sep 17 00:00:00 2001 From: c0d3d3v Date: Mon, 13 Mar 2023 09:03:13 +0100 Subject: [PATCH 06/10] [extractor/echo360] Respect soft line length limit --- yt_dlp/extractor/echo360.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/echo360.py b/yt_dlp/extractor/echo360.py index de5c3e573..ee853c682 100644 --- a/yt_dlp/extractor/echo360.py +++ b/yt_dlp/extractor/echo360.py @@ -78,7 +78,8 @@ class Echo360IE(InfoExtractor): href = self._update_url_query(href, self._get_query_string(href, query_strings)) if track.get('isHls') or determine_ext(href, None) == 'm3u8': hls_formats = self._extract_m3u8_formats( - href, video_id, live=track.get('isLive'), m3u8_id='hls', entry_protocol='m3u8_native', fatal=False + href, video_id, live=track.get('isLive'), m3u8_id='hls', + entry_protocol='m3u8_native', fatal=False ) for hls_format in hls_formats: @@ -101,7 +102,8 @@ class Echo360IE(InfoExtractor): host, video_id = self._match_valid_url(url).group('host', 'id') webpage = self._download_webpage(url, video_id) - player_config = self._search_json(r'Echo\["mediaPlayerBootstrapApp"\]\("', webpage, 'player config', video_id, + player_config = self._search_json(r'Echo\["mediaPlayerBootstrapApp"\]\("', webpage, + 'player config', video_id, transform_source=lambda x: x.replace(R'\"', '"')) urlh = self._request_webpage( @@ -112,5 +114,5 @@ class Echo360IE(InfoExtractor): ) return self._parse_mediapackage(self._call_api( - host, player_config.get('shareLinkId') or player_config['publicLinkId'], player_config['mediaId'], - urlh.headers['Token'])['data']) + host, player_config.get('shareLinkId') or player_config['publicLinkId'], + player_config['mediaId'], urlh.headers['Token'])['data']) From 5ca5b56cfc9cff1afae36ae9d68496d541c06d34 Mon Sep 17 00:00:00 2001 From: c0d3d3v Date: Wed, 15 Mar 2023 11:31:13 +0100 Subject: [PATCH 07/10] [extractor/echo360] Implement all suggestions --- yt_dlp/extractor/echo360.py | 64 ++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 37 deletions(-) diff --git a/yt_dlp/extractor/echo360.py b/yt_dlp/extractor/echo360.py index ee853c682..8f95b61a4 100644 --- a/yt_dlp/extractor/echo360.py +++ b/yt_dlp/extractor/echo360.py @@ -1,28 +1,20 @@ import re +from urllib.parse import urlparse, urlunparse + from .common import InfoExtractor from ..utils import ( determine_ext, float_or_none, traverse_obj, - variadic, ) class Echo360IE(InfoExtractor): - _INSTANCES_RE = r'''(?: - echo360\.ca| - echo360\.net\.au| - echo360\.org\.au| - echo360\.org\.uk| - echo360\.org| - )''' - _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}' - _VALID_URL = rf'''(?x) - https?://(?P{_INSTANCES_RE}) - /media/(?P{_UUID_RE})/public''' - - _API_BASE = 'https://%s/api/ui/echoplayer/public-links/%s/media/%s/player-properties' + _VALID_URL = r'''(?x) + https?://(?Pecho360\.(?:ca|net\.au|org|org\.au|org\.uk))/ + media/(?P[\da-fA-F]{8}-(?:[\da-fA-F]{4}-){3}[\da-fA-F]{12})/public + ''' _TESTS = [ { @@ -49,21 +41,22 @@ class Echo360IE(InfoExtractor): def _call_api(self, host, video_id, media_id, session_token, **kwargs): return self._download_json( - self._API_BASE % (host, video_id, media_id), video_id, - headers={'Authorization': f'Bearer {session_token}'}, **kwargs) + f'https://{host}/api/ui/echoplayer/public-links/{video_id}/media/{media_id}/player-properties', + video_id, headers={'Authorization': f'Bearer {session_token}'}, **kwargs) - @staticmethod - def _update_url_query(uri, query_string): + def _replace_url_query(self, url, query_string): if query_string is not None: - return f'{uri.split("?", 1)[0]}?{query_string}' - return uri + return urlunparse(urlparse(url)._replace(query=query_string)) + return url - @staticmethod - def _get_query_string(uri, query_strings): - uri_base = uri.split("?", 1)[0] + def _get_query_string(self, uri, query_strings): + uri_base = urlparse(uri)._replace(query='', fragment='').geturl() for query_string in query_strings: - if re.match(query_string['uriPattern'], uri_base): - return query_string['queryString'] + try: + if re.match(query_string['uriPattern'], uri_base): + return query_string['queryString'] + except re.error as re_error: + self.report_warning(f'Error in query string pattern `{re_error.pattern}`: {re_error.msg}') return None def _parse_mediapackage(self, video): @@ -71,11 +64,11 @@ class Echo360IE(InfoExtractor): query_strings = traverse_obj(video, ('sourceQueryStrings', 'queryStrings')) or [] formats = [] - for track in variadic(traverse_obj(video, ('playableAudioVideo', 'playableMedias')) or []): + for track in traverse_obj(video, ('playableAudioVideo', 'playableMedias', ...)): href = track.get('uri') if href is None: continue - href = self._update_url_query(href, self._get_query_string(href, query_strings)) + href = self._replace_url_query(href, self._get_query_string(href, query_strings)) if track.get('isHls') or determine_ext(href, None) == 'm3u8': hls_formats = self._extract_m3u8_formats( href, video_id, live=track.get('isLive'), m3u8_id='hls', @@ -85,7 +78,7 @@ class Echo360IE(InfoExtractor): for hls_format in hls_formats: query_string = self._get_query_string(hls_format['url'], query_strings) hls_format['extra_param_to_segment_url'] = query_string - hls_format['url'] = self._update_url_query(hls_format['url'], query_string) + hls_format['url'] = self._replace_url_query(hls_format['url'], query_string) formats.extend(hls_formats) @@ -94,24 +87,21 @@ class Echo360IE(InfoExtractor): 'formats': formats, 'title': video.get('mediaName'), 'duration': float_or_none(self._search_regex( - r'PT(\d+\.?\d+)S', traverse_obj(video, ('playableAudioVideo', 'duration')), - 'video duration', default=None, fatal=False)), + r'PT([\d.]+)S', traverse_obj(video, ('playableAudioVideo', 'duration')), + 'video duration', fatal=False)), } def _real_extract(self, url): host, video_id = self._match_valid_url(url).group('host', 'id') webpage = self._download_webpage(url, video_id) - player_config = self._search_json(r'Echo\["mediaPlayerBootstrapApp"\]\("', webpage, - 'player config', video_id, - transform_source=lambda x: x.replace(R'\"', '"')) + player_config = self._search_json( + r'Echo\["mediaPlayerBootstrapApp"\]\("', webpage, 'player config', + video_id, transform_source=lambda x: x.replace(R'\"', '"')) urlh = self._request_webpage( f'https://{host}/api/ui/sessions/{player_config["sessionId"]}', - video_id, - note='Open video session', - errnote='Unable to open video session', - ) + video_id, 'Open video session', 'Unable to open video session') return self._parse_mediapackage(self._call_api( host, player_config.get('shareLinkId') or player_config['publicLinkId'], From 333eb58c7749f29b9bbc8f8cf2e5d291bc836f2e Mon Sep 17 00:00:00 2001 From: c0d3d3v Date: Thu, 18 Apr 2024 18:55:00 +0200 Subject: [PATCH 08/10] use update_url instead of urllib.parse --- yt_dlp/extractor/echo360.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/yt_dlp/extractor/echo360.py b/yt_dlp/extractor/echo360.py index 8f95b61a4..893c5cfb9 100644 --- a/yt_dlp/extractor/echo360.py +++ b/yt_dlp/extractor/echo360.py @@ -1,13 +1,7 @@ import re -from urllib.parse import urlparse, urlunparse - from .common import InfoExtractor -from ..utils import ( - determine_ext, - float_or_none, - traverse_obj, -) +from ..utils import determine_ext, float_or_none, traverse_obj, update_url class Echo360IE(InfoExtractor): @@ -44,13 +38,8 @@ class Echo360IE(InfoExtractor): f'https://{host}/api/ui/echoplayer/public-links/{video_id}/media/{media_id}/player-properties', video_id, headers={'Authorization': f'Bearer {session_token}'}, **kwargs) - def _replace_url_query(self, url, query_string): - if query_string is not None: - return urlunparse(urlparse(url)._replace(query=query_string)) - return url - def _get_query_string(self, uri, query_strings): - uri_base = urlparse(uri)._replace(query='', fragment='').geturl() + uri_base = update_url(uri, query=None, fragment=None) for query_string in query_strings: try: if re.match(query_string['uriPattern'], uri_base): @@ -68,7 +57,7 @@ class Echo360IE(InfoExtractor): href = track.get('uri') if href is None: continue - href = self._replace_url_query(href, self._get_query_string(href, query_strings)) + href = update_url(href, query=self._get_query_string(href, query_strings)) if track.get('isHls') or determine_ext(href, None) == 'm3u8': hls_formats = self._extract_m3u8_formats( href, video_id, live=track.get('isLive'), m3u8_id='hls', @@ -78,7 +67,7 @@ class Echo360IE(InfoExtractor): for hls_format in hls_formats: query_string = self._get_query_string(hls_format['url'], query_strings) hls_format['extra_param_to_segment_url'] = query_string - hls_format['url'] = self._replace_url_query(hls_format['url'], query_string) + hls_format['url'] = update_url(hls_format['url'], query=query_string) formats.extend(hls_formats) From 333e92b1e5a07fd1000ad863db7e4f155474ad8e Mon Sep 17 00:00:00 2001 From: c0d3d3v Date: Thu, 18 Apr 2024 19:02:59 +0200 Subject: [PATCH 09/10] [extractor/echo360] use traverse_obj with lambda and parse_duration --- yt_dlp/extractor/echo360.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/yt_dlp/extractor/echo360.py b/yt_dlp/extractor/echo360.py index 893c5cfb9..40d8c1ed6 100644 --- a/yt_dlp/extractor/echo360.py +++ b/yt_dlp/extractor/echo360.py @@ -1,7 +1,13 @@ import re from .common import InfoExtractor -from ..utils import determine_ext, float_or_none, traverse_obj, update_url +from ..utils import ( + determine_ext, + float_or_none, + parse_duration, + traverse_obj, + update_url, +) class Echo360IE(InfoExtractor): @@ -50,19 +56,14 @@ class Echo360IE(InfoExtractor): def _parse_mediapackage(self, video): video_id = video['playableAudioVideo']['mediaId'] - query_strings = traverse_obj(video, ('sourceQueryStrings', 'queryStrings')) or [] + query_strings = traverse_obj(video, ('sourceQueryStrings', 'queryStrings', ...)) formats = [] - for track in traverse_obj(video, ('playableAudioVideo', 'playableMedias', ...)): - href = track.get('uri') - if href is None: - continue - href = update_url(href, query=self._get_query_string(href, query_strings)) - if track.get('isHls') or determine_ext(href, None) == 'm3u8': + for track in traverse_obj(video, ('playableAudioVideo', 'playableMedias', lambda _, v: v['uri'])): + href = update_url(track['uri'], query=self._get_query_string(track['uri'], query_strings)) + if track.get('isHls') or determine_ext(href) == 'm3u8': hls_formats = self._extract_m3u8_formats( - href, video_id, live=track.get('isLive'), m3u8_id='hls', - entry_protocol='m3u8_native', fatal=False - ) + href, video_id, live=track.get('isLive'), m3u8_id='hls', fatal=False) for hls_format in hls_formats: query_string = self._get_query_string(hls_format['url'], query_strings) @@ -75,9 +76,7 @@ class Echo360IE(InfoExtractor): 'id': video_id, 'formats': formats, 'title': video.get('mediaName'), - 'duration': float_or_none(self._search_regex( - r'PT([\d.]+)S', traverse_obj(video, ('playableAudioVideo', 'duration')), - 'video duration', fatal=False)), + 'duration': traverse_obj(video, ('playableAudioVideo', 'duration', {parse_duration})), } def _real_extract(self, url): From 38741ad6e5f064330b5bc43f3df4f44bc129eb7b Mon Sep 17 00:00:00 2001 From: c0d3d3v Date: Thu, 18 Apr 2024 19:08:08 +0200 Subject: [PATCH 10/10] [extractor/echo360] remove unused import --- yt_dlp/extractor/echo360.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/yt_dlp/extractor/echo360.py b/yt_dlp/extractor/echo360.py index 40d8c1ed6..6dc0a6919 100644 --- a/yt_dlp/extractor/echo360.py +++ b/yt_dlp/extractor/echo360.py @@ -1,13 +1,7 @@ import re from .common import InfoExtractor -from ..utils import ( - determine_ext, - float_or_none, - parse_duration, - traverse_obj, - update_url, -) +from ..utils import determine_ext, parse_duration, traverse_obj, update_url class Echo360IE(InfoExtractor):