from ..utils import ( ExtractorError, traverse_obj, unified_strdate, url_or_none, ) from .common import InfoExtractor from ..compat import ( compat_urllib_parse_unquote, compat_str ) class MediaKlikkIE(InfoExtractor): _VALID_URL = r'''(?x)https?://(?:www\.)? (?:mediaklikk|m4sport|hirado|petofilive)\.hu/.*?(?:videok?|cikk)/ (?:(?P<year>[0-9]{4})/(?P<month>[0-9]{1,2})/(?P<day>[0-9]{1,2})/)? (?P<id>[^/#?_]+)''' _TESTS = [{ # (old) mediaklikk. date in html. 'url': '', 'info_dict': { 'id': '4754129', 'title': 'Hazajáró, DÉLNYUGAT-BÁCSKA – A Duna mentén Palánkától Doroszlóig', 'ext': 'mp4', 'upload_date': '20210901', 'thumbnail': '' }, 'skip': 'Webpage redirects to 404 page', }, { # mediaklikk. date in html. 'url': '', 'info_dict': { 'id': '6696133', 'title': 'Hazajáró, Fabova-hegység - Kishont koronája', 'display_id': 'hazajaro-fabova-hegyseg-kishont-koronaja', 'ext': 'mp4', 'upload_date': '20230903', 'thumbnail': '' } }, { # (old) m4sport 'url': '', 'info_dict': { 'id': '4754999', 'title': 'Gyémánt Liga, Párizs', 'ext': 'mp4', 'upload_date': '20210830', 'thumbnail': '' }, 'skip': 'Webpage redirects to 404 page', }, { # m4sport 'url': '', 'info_dict': { 'id': '6711136', 'title': 'Atlétika – Gyémánt Liga, Brüsszel', 'display_id': 'atletika-gyemant-liga-brusszel', 'ext': 'mp4', 'upload_date': '20230908', 'thumbnail': '' } }, { # m4sport with *video/ url and no date 'url': '', 'info_dict': { 'id': '4492099', 'title': 'Real Madrid - Chelsea 1-1', 'display_id': 'real-madrid-chelsea-1-1', 'ext': 'mp4', 'thumbnail': '' } }, { # (old) hirado 'url': '', 'info_dict': { 'id': '4760120', 'title': 'Feltételeket szabott a főváros', 'ext': 'mp4', 'thumbnail': '' }, 'skip': 'Webpage redirects to video list page', }, { # hirado 'url': '', 'info_dict': { 'id': '6716068', 'title': 'Marad az éves elszámolás a napelemekre beruházó családoknál', 'display_id': 'marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal', 'ext': 'mp4', 'upload_date': '20230911', 'thumbnail': '' } }, { # (old) petofilive 'url': '', 'info_dict': { 'id': '4571948', 'title': 'Tha Shudras az Akusztikban', 'ext': 'mp4', 'upload_date': '20210607', 'thumbnail': '' }, 'skip': 'Webpage redirects to empty page', }, { # petofilive 'url': '', 'info_dict': { 'id': '6713233', 'title': 'Futball Fesztivál a Margitszigeten', 'display_id': 'futball-fesztival-a-margitszigeten', 'ext': 'mp4', 'upload_date': '20230909', 'thumbnail': '' } }] def _real_extract(self, url): mobj = self._match_valid_url(url) display_id ='id') webpage = self._download_webpage(url, display_id) player_data_str = self._html_search_regex( r'mtva_player_manager\.player\(document.getElementById\(.*\),\s?(\{.*\}).*\);', webpage, 'player data') player_data = self._parse_json(player_data_str, display_id, compat_urllib_parse_unquote) video_id = compat_str(player_data['contentId']) title = player_data.get('title') or self._og_search_title(webpage, fatal=False) or \ self._html_search_regex(r'<h\d+\b[^>]+\bclass="article_title">([^<]+)<', webpage, 'title') upload_date = unified_strdate( '%s-%s-%s' % ('year'),'month'),'day'))) if not upload_date: upload_date = unified_strdate(self._html_search_regex( r'<p+\b[^>]+\bclass="article_date">([^<]+)<', webpage, 'upload date', default=None)) player_data['video'] = player_data.pop('token') player_page = self._download_webpage('', video_id, query=player_data) player_json = self._search_json( r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);') playlist_url = traverse_obj( player_json, ('playlist', lambda _, v: v['type'] == 'hls', 'file', {url_or_none}), get_all=False) if not playlist_url: raise ExtractorError('Unable to extract playlist url') formats = self._extract_wowza_formats( playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash']) return { 'id': video_id, 'title': title, 'display_id': display_id, 'formats': formats, 'upload_date': upload_date, 'thumbnail': player_data.get('bgImage') or self._og_search_thumbnail(webpage) }