From 0a08a50f10ebe989023904b444ebf82847dbca12 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Wed, 4 Sep 2024 17:18:19 +0000 Subject: [PATCH] [ie/matchtv:video] Add extractor --- yt_dlp/extractor/_extractors.py | 6 +- yt_dlp/extractor/matchtv.py | 99 +++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index e7b162512f..3d167d86d6 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1086,7 +1086,11 @@ ) from .massengeschmacktv import MassengeschmackTVIE from .masters import MastersIE -from .matchtv import MatchTVIE +from .matchtv import ( + MatchTVFeedIE, + MatchTVIE, + MatchTVVideoIE, +) from .mbn import MBNIE from .mdr import MDRIE from .medaltv import MedalTVIE diff --git a/yt_dlp/extractor/matchtv.py b/yt_dlp/extractor/matchtv.py index 93799fe859..045164a995 100644 --- a/yt_dlp/extractor/matchtv.py +++ b/yt_dlp/extractor/matchtv.py @@ -1,4 +1,11 @@ from .common import InfoExtractor +from ..utils import ( + determine_ext, + extract_attributes, + join_nonempty, + xpath_text, +) +from ..utils.traversal import traverse_obj class MatchTVIE(InfoExtractor): @@ -33,3 +40,95 @@ def _real_extract(self, url): 'is_live': True, 'formats': self._extract_m3u8_formats(video_url, video_id, 'mp4', live=True), } + + +# WebcasterIE +class MatchTVVideoIE(InfoExtractor): + _GEO_COUNTRIES = ['RU'] + _VALID_URL = r'https?://[.\w-]+/(?:quote|media)/start/free_(?P[^/]+)' + _TESTS = [] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_xml(url, video_id) + + title = xpath_text(video, './/event_name', 'event name', fatal=True) + + formats = [] + for format_id in (None, 'noise'): + track_tag = join_nonempty('track', format_id, delim='_') + for track in video.findall(f'.//iphone/{track_tag}'): + track_url = track.text + if not track_url: + continue + if determine_ext(track_url) == 'm3u8': + m3u8_formats = self._extract_m3u8_formats( + track_url, video_id, 'mp4', + entry_protocol='m3u8_native', + m3u8_id=join_nonempty('hls', format_id, delim='-'), fatal=False) + for f in m3u8_formats: + f.update({ + 'source_preference': 0 if format_id == 'noise' else 1, + 'format_note': track.get('title'), + }) + formats.extend(m3u8_formats) + + thumbnail = xpath_text(video, './/image', 'thumbnail') + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + } + + +# WebcasterFeedIE +class MatchTVFeedIE(InfoExtractor): + _GEO_COUNTRIES = ['RU'] + _VALID_URL = r'https?://[.\w-]+/feed/start/free_(?P[^/]+)' + _EMBED_REGEX = [r'<(?:object|a|span[^>]+class=["\']webcaster-player["\'])[^>]+data(?:-config)?=(["\']).*?config=(?Phttps?://(?:(?!\1).)+)\1'] + _TESTS = [] + _WEBPAGE_TESTS = [{ + 'url': 'https://matchtv.ru/football/matchtvvideo_NI1593368_clip_Zolotoj_dubl_Cherchesova_Specialnyj_reportazh', + 'info_dict': { + 'id': '675ea0e4b4b1d54d21f9b52db6624199', + 'ext': 'mp4', + 'title': '«Золотой дубль Черчесова». Специальный репортаж', + 'thumbnail': r're:https?://[\w-]+.video.matchtv.ru/fc/[\w-]+/thumbnails/events/920749/135154185.jpg', + }, + }, { + 'url': 'https://matchtv.ru/football/rossija/kubok_rossii/matchtvvideo_NI2100168_translation_FONBET_Kubok_Rossii_Tekstilshhik___Spartak_Kostroma', + 'info_dict': { + 'id': 'b6570efa80dc28df18523237d3f14a5b', + 'ext': 'mp4', + 'title': 'FONBET Кубок России по футболу сезона 2024 - 2025 гг. Текстильщик - Спартак Кострома', + 'thumbnail': r're:https?://[\w-]+.video.matchtv.ru/fc/[\w-]+/thumbnails/events/1202122/1039728778.jpg', + }, + }, { + 'url': 'https://matchtv.ru/biathlon/matchtvvideo_NI1938496_translation_Letnij_biatlon_Alfa_Bank_Kubok_Sodruzhestva_Sprint_Muzhchiny', + 'info_dict': { + 'id': '20975a4cd84acdb55a0b5521277d0402', + 'ext': 'mp4', + 'title': 'Летний биатлон. Альфа-Банк Кубок Содружества. Спринт. Мужчины', + 'thumbnail': r're:https?://[\w-]+.video.matchtv.ru/fc/[\w-]+/thumbnails/events/1101266/590556538.jpg', + }, + }] + + def _extract_from_webpage(self, url, webpage): + yield from super()._extract_from_webpage(url, webpage) + + yield from traverse_obj(self._yield_json_ld(webpage, None), ( + lambda _, v: v['@type'] == 'VideoObject', 'url', + {extract_attributes}, 'src', {self.url_result})) + + def _real_extract(self, url): + video_id = self._match_id(url) + + feed = self._download_xml(url, video_id) + + video_url = xpath_text( + feed, ('video_hd', 'video'), 'video url', fatal=True) + + return self.url_result(video_url)