From 6e07e4bc7e59f5bdb60e93c011e57b18b009f2b5 Mon Sep 17 00:00:00 2001 From: zhallgato Date: Sat, 16 Sep 2023 12:12:18 +0200 Subject: [PATCH] [ie/mediaklikk] Fix extractor (#8086) Fixes https://github.com/yt-dlp/yt-dlp/issues/8053 Authored by: bashonly, zhallgato --- yt_dlp/extractor/mediaklikk.py | 72 ++++++++++++++++++++++++++++++---- 1 file changed, 64 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/mediaklikk.py b/yt_dlp/extractor/mediaklikk.py index 46365081b..fcc4827b5 100644 --- a/yt_dlp/extractor/mediaklikk.py +++ b/yt_dlp/extractor/mediaklikk.py @@ -1,5 +1,8 @@ from ..utils import ( - unified_strdate + ExtractorError, + traverse_obj, + unified_strdate, + url_or_none, ) from .common import InfoExtractor from ..compat import ( @@ -15,7 +18,7 @@ class MediaKlikkIE(InfoExtractor): (?P[^/#?_]+)''' _TESTS = [{ - # mediaklikk. date in html. + # (old) mediaklikk. date in html. 'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/', 'info_dict': { 'id': '4754129', @@ -23,9 +26,21 @@ class MediaKlikkIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20210901', 'thumbnail': 'http://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg' + }, + 'skip': 'Webpage redirects to 404 page', + }, { + # mediaklikk. date in html. + 'url': 'https://mediaklikk.hu/video/hazajaro-fabova-hegyseg-kishont-koronaja/', + 'info_dict': { + 'id': '6696133', + 'title': 'Hazajáró, Fabova-hegység - Kishont koronája', + 'display_id': 'hazajaro-fabova-hegyseg-kishont-koronaja', + 'ext': 'mp4', + 'upload_date': '20230903', + 'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg' } }, { - # m4sport + # (old) m4sport 'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/', 'info_dict': { 'id': '4754999', @@ -33,6 +48,18 @@ class MediaKlikkIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20210830', 'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/08/vlcsnap-2021-08-30-18h21m20s10-1024x576.jpg' + }, + 'skip': 'Webpage redirects to 404 page', + }, { + # m4sport + 'url': 'https://m4sport.hu/sportkozvetitesek/video/2023/09/08/atletika-gyemant-liga-brusszel/', + 'info_dict': { + 'id': '6711136', + 'title': 'Atlétika – Gyémánt Liga, Brüsszel', + 'display_id': 'atletika-gyemant-liga-brusszel', + 'ext': 'mp4', + 'upload_date': '20230908', + 'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg' } }, { # m4sport with *video/ url and no date @@ -40,20 +67,33 @@ class MediaKlikkIE(InfoExtractor): 'info_dict': { 'id': '4492099', 'title': 'Real Madrid - Chelsea 1-1', + 'display_id': 'real-madrid-chelsea-1-1', 'ext': 'mp4', - 'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png' + 'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png' } }, { - # hirado + # (old) hirado 'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/', 'info_dict': { 'id': '4760120', 'title': 'Feltételeket szabott a főváros', 'ext': 'mp4', 'thumbnail': 'http://hirado.hu/wp-content/uploads/sites/4/2021/09/vlcsnap-2021-09-01-20h20m37s165.jpg' + }, + 'skip': 'Webpage redirects to video list page', + }, { + # hirado + 'url': 'https://hirado.hu/belfold/video/2023/09/11/marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal', + 'info_dict': { + 'id': '6716068', + 'title': 'Marad az éves elszámolás a napelemekre beruházó családoknál', + 'display_id': 'marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal', + 'ext': 'mp4', + 'upload_date': '20230911', + 'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg' } }, { - # petofilive + # (old) petofilive 'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/', 'info_dict': { 'id': '4571948', @@ -61,6 +101,18 @@ class MediaKlikkIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20210607', 'thumbnail': 'http://petofilive.hu/wp-content/uploads/sites/4/2021/06/vlcsnap-2021-06-07-22h14m23s915-1024x576.jpg' + }, + 'skip': 'Webpage redirects to empty page', + }, { + # petofilive + 'url': 'https://petofilive.hu/video/2023/09/09/futball-fesztival-a-margitszigeten/', + 'info_dict': { + 'id': '6713233', + 'title': 'Futball Fesztivál a Margitszigeten', + 'display_id': 'futball-fesztival-a-margitszigeten', + 'ext': 'mp4', + 'upload_date': '20230909', + 'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg' } }] @@ -84,8 +136,12 @@ def _real_extract(self, url): player_data['video'] = player_data.pop('token') player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data) - playlist_url = self._proto_relative_url(compat_urllib_parse_unquote( - self._html_search_regex(r'\"file\":\s*\"(\\?/\\?/.*playlist\.m3u8)\"', player_page, 'playlist_url')).replace('\\/', '/')) + player_json = self._search_json( + r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);') + playlist_url = traverse_obj( + player_json, ('playlist', lambda _, v: v['type'] == 'hls', 'file', {url_or_none}), get_all=False) + if not playlist_url: + raise ExtractorError('Unable to extract playlist url') formats = self._extract_wowza_formats( playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])