2024-11-29 19:41:31 +00:00
6 changed files with 40 additions and 324 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -982,10 +982,6 @@
    MediasiteCatalogIE,
    MediasiteNamedCatalogIE,
 )
 from .mediastream import (
    MediaStreamIE,
    WinSportsVideoIE,
 )
 from .mediaworksnz import MediaWorksNZVODIE
 from .medici import MediciIE
 from .megaphone import MegaphoneIE
@ -1572,7 +1568,6 @@
 from .rule34video import Rule34VideoIE
 from .rumble import (
    RumbleEmbedIE,
    RumbleIE,
    RumbleChannelIE,
 )
 from .rutube import (
@ -2198,7 +2193,6 @@
    WDRElefantIE,
    WDRMobileIE,
 )
 from .webcamerapl import WebcameraplIE
 from .webcaster import (
    WebcasterIE,
    WebcasterFeedIE,
--- a/yt_dlp/extractor/la7.py
+++ b/yt_dlp/extractor/la7.py
@ -2,6 +2,7 @@
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
    float_or_none,
    HEADRequest,
    int_or_none,
@ -12,13 +13,13 @@
 class LA7IE(InfoExtractor):
    IE_NAME = 'la7.it'
-    _VALID_URL = r'''(?x)https?://(?:
+    _VALID_URL = r'''(?x)(https?://)?(?:
-        (?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video|news)/|
+        (?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video)/|
        tg\.la7\.it/repliche-tgla7\?id=
    )(?P<id>.+)'''
    _TESTS = [{
-        # single quality video
+        # 'src' is a plain URL
        'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722',
        'md5': '8b613ffc0c4bf9b9e377169fc19c214c',
        'info_dict': {
@ -28,20 +29,6 @@ class LA7IE(InfoExtractor):
            'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico',
            'thumbnail': 're:^https?://.*',
            'upload_date': '20151002',
            'formats': 'count:4',
        },
    }, {
        # multiple quality video
        'url': 'https://www.la7.it/calcio-femminile/news/il-gol-di-lindsey-thomas-fiorentina-vs-milan-serie-a-calcio-femminile-26-11-2022-461736',
        'md5': 'd2370e78f75e8d1238cb3a0db9a2eda3',
        'info_dict': {
            'id': 'il-gol-di-lindsey-thomas-fiorentina-vs-milan-serie-a-calcio-femminile-26-11-2022-461736',
            'ext': 'mp4',
            'title': 'Il gol di Lindsey Thomas | Fiorentina vs Milan | Serie A Calcio Femminile',
            'description': 'Il gol di Lindsey Thomas | Fiorentina vs Milan | Serie A Calcio Femminile',
            'thumbnail': 're:^https?://.*',
            'upload_date': '20221126',
            'formats': 'count:8',
        },
    }, {
        'url': 'http://www.la7.it/omnibus/rivedila7/omnibus-news-02-07-2016-189077',
@ -52,7 +39,7 @@ class LA7IE(InfoExtractor):
    def _generate_mp4_url(self, quality, m3u8_formats):
        for f in m3u8_formats:
            if f['vcodec'] != 'none' and quality in f['url']:
-                http_url = f'{self._HOST}{quality}.mp4'
+                http_url = '%s%s.mp4' % (self._HOST, quality)
                urlh = self._request_webpage(
                    HEADRequest(http_url), quality,
@ -71,13 +58,12 @@ def _generate_mp4_url(self, quality, m3u8_formats):
    def _real_extract(self, url):
        video_id = self._match_id(url)
        if not url.startswith('http'):
            url = '%s//%s' % (self.http_scheme(), url)
        webpage = self._download_webpage(url, video_id)
-
+        video_path = self._search_regex(r'(/content/.*?).mp4', webpage, 'video_path')
        if re.search(r'(?i)(drmsupport\s*:\s*true)\s*', webpage):
            self.report_drm(video_id)
        video_path = self._search_regex(
            r'(/content/[\w/,]+?)\.mp4(?:\.csmil)?/master\.m3u8', webpage, 'video_path')
        formats = self._extract_mpd_formats(
            f'{self._HOST}/local/dash/,{video_path}.mp4.urlset/manifest.mpd',
@ -104,7 +90,8 @@ def _real_extract(self, url):
 class LA7PodcastEpisodeIE(InfoExtractor):
    IE_NAME = 'la7.it:pod:episode'
-    _VALID_URL = r'https?://(?:www\.)?la7\.it/[^/]+/podcast/([^/]+-)?(?P<id>\d+)'
+    _VALID_URL = r'''(?x)(https?://)?
        (?:www\.)?la7\.it/[^/]+/podcast/([^/]+-)?(?P<id>\d+)'''
    _TESTS = [{
        'url': 'https://www.la7.it/voicetown/podcast/la-carezza-delle-memoria-di-carlo-verdone-23-03-2021-371497',
@ -138,15 +125,14 @@ def _extract_info(self, webpage, video_id=None, ppn=None):
                webpage, 'video_id', group='vid')
        media_url = self._search_regex(
-            (r'src\s*:\s*([\'"])(?P<url>\S+?mp3.+?)\1',
+            (r'src:\s*([\'"])(?P<url>.+?mp3.+?)\1',
-             r'data-podcast\s*=\s*([\'"])(?P<url>\S+?mp3.+?)\1'),
+             r'data-podcast=([\'"])(?P<url>.+?mp3.+?)\1'),
            webpage, 'media_url', group='url')
        ext = determine_ext(media_url)
        formats = [{
            'url': media_url,
-            'format_id': 'http-mp3',
+            'format_id': ext,
-            'ext': 'mp3',
+            'ext': ext,
            'acodec': 'mp3',
            'vcodec': 'none',
        }]
        title = self._html_search_regex(
@ -187,7 +173,7 @@ def _extract_info(self, webpage, video_id=None, ppn=None):
        # and title is the same as the show_title
        # add the date to the title
        if date and not date_alt and ppn and ppn.lower() == title.lower():
-            title = f'{title} del {date}'
+            title += ' del %s' % date
        return {
            'id': video_id,
            'title': title,
@ -207,7 +193,7 @@ def _real_extract(self, url):
 class LA7PodcastIE(LA7PodcastEpisodeIE):  # XXX: Do not subclass from concrete IE
    IE_NAME = 'la7.it:podcast'
-    _VALID_URL = r'https?://(?:www\.)?la7\.it/(?P<id>[^/]+)/podcast/?(?:$|[#?])'
+    _VALID_URL = r'(https?://)?(www\.)?la7\.it/(?P<id>[^/]+)/podcast/?(?:$|[#?])'
    _TESTS = [{
        'url': 'https://www.la7.it/propagandalive/podcast',
@ -215,7 +201,7 @@ class LA7PodcastIE(LA7PodcastEpisodeIE):  # XXX: Do not subclass from concrete I
            'id': 'propagandalive',
            'title': "Propaganda Live",
        },
-        'playlist_count_min': 10,
+        'playlist_count': 10,
    }]
    def _real_extract(self, url):
--- a/yt_dlp/extractor/mediastream.py
+++ b/yt_dlp/extractor/mediastream.py
@ -1,155 +0,0 @@
 import re
 from .common import InfoExtractor
 from ..utils import clean_html, get_element_html_by_class
 class MediaStreamIE(InfoExtractor):
    _VALID_URL = r'https?://mdstrm.com/(?:embed|live-stream)/(?P<id>\w+)'
    _TESTS = [{
        'url': 'https://mdstrm.com/embed/6318e3f1d1d316083ae48831',
        'md5': '97b4f2634b8e8612cc574dfcd504df05',
        'info_dict': {
            'id': '6318e3f1d1d316083ae48831',
            'title': 'Video: Así fue el despido de Thomas Tuchel del Chelsea',
            'description': 'md5:358ce1e1396010d50a1ece1be3633c95',
            'thumbnail': r're:^https?://[^?#]+6318e3f1d1d316083ae48831',
            'ext': 'mp4',
        },
    }]
    _WEBPAGE_TESTS = [{
        'url': 'https://www.multimedios.com/video/costa-rica-tv-en-vivo/v2616',
        'info_dict': {
            'id': '5a7b1e63a8da282c34d65445',
            'title': 're:mmtv-costarica',
            'description': 'mmtv-costarica',
            'thumbnail': 're:^https?://[^?#]+5a7b1e63a8da282c34d65445',
            'ext': 'mp4',
            'live_status': 'is_live',
        },
        'params': {
            'skip_download': 'Livestream'
        },
    }, {
        'url': 'https://www.multimedios.com/television/clases-de-llaves-y-castigos-quien-sabe-mas',
        'md5': 'de31f0b1ecc321fb35bf22d58734ea40',
        'info_dict': {
            'id': '63731bab8ec9b308a2c9ed28',
            'title': 'Clases de llaves y castigos ¿Quién sabe más?',
            'description': 'md5:1b49aa1ee5a4b32fbd66104b2d629e9d',
            'thumbnail': 're:^https?://[^?#]+63731bab8ec9b308a2c9ed28',
            'ext': 'mp4',
        },
    }, {
        'url': 'https://www.americatv.com.pe/videos/esto-es-guerra/facundo-gonzalez-sufrio-fuerte-golpe-durante-competencia-frente-hugo-garcia-eeg-noticia-139120',
        'info_dict': {
            'id': '63756df1c638b008a5659dec',
            'title': 'Facundo González sufrió fuerte golpe durante competencia frente a Hugo García en EEG',
            'description': 'md5:9490c034264afd756eef7b2c3adee69e',
            'thumbnail': 're:^https?://[^?#]+63756df1c638b008a5659dec',
            'ext': 'mp4',
        },
    }, {
        'url': 'https://www.americatv.com.pe/videos/al-fondo-hay-sitio/nuevas-lomas-town-bernardo-mata-se-enfrento-sujeto-luchar-amor-macarena-noticia-139083',
        'info_dict': {
            'id': '637307669609130f74cd3a6e',
            'title': 'Las Nuevas Lomas Town: Bernardo De La Mata se enfrentó a sujeto para luchar por el amor de Macarena',
            'description': 'md5:60d71772f1e1496923539ae58aa17124',
            'thumbnail': 're:^https?://[^?#]+637307669609130f74cd3a6e',
            'ext': 'mp4',
        },
    }]
    @classmethod
    def _extract_embed_urls(cls, url, webpage):
        for mobj in re.finditer(r'<script[^>]+>[^>]*playerMdStream.mdstreamVideo\(\s*[\'"](?P<video_id>\w+)', webpage):
            yield f'https://mdstrm.com/embed/{mobj.group("video_id")}'
        yield from re.findall(
            r'<iframe[^>]src\s*=\s*"(https://mdstrm.com/[\w-]+/\w+)', webpage)
        for mobj in re.finditer(
            r'''(?x)
                <(?:div|ps-mediastream)[^>]+
                class\s*=\s*"[^"]*MediaStreamVideoPlayer[^"]*"[^>]+
                data-video-id\s*=\s*"(?P<video_id>\w+)\s*"
                (?:\s*data-video-type\s*=\s*"(?P<video_type>[^"]+))?
                ''', webpage):
            video_type = 'live-stream' if mobj.group('video_type') == 'live' else 'embed'
            yield f'https://mdstrm.com/{video_type}/{mobj.group("video_id")}'
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        if 'Debido a tu ubicación no puedes ver el contenido' in webpage:
            self.raise_geo_restricted()
        player_config = self._search_json(r'window.MDSTRM.OPTIONS\s*=', webpage, 'metadata', video_id)
        formats, subtitles = [], {}
        for video_format in player_config['src']:
            if video_format == 'hls':
                fmts, subs = self._extract_m3u8_formats_and_subtitles(player_config['src'][video_format], video_id)
                formats.extend(fmts)
                self._merge_subtitles(subs, target=subtitles)
            elif video_format == 'mpd':
                fmts, subs = self._extract_mpd_formats_and_subtitles(player_config['src'][video_format], video_id)
                formats.extend(fmts)
                self._merge_subtitles(subs, target=subtitles)
            else:
                formats.append({
                    'url': player_config['src'][video_format],
                })
        return {
            'id': video_id,
            'title': self._og_search_title(webpage) or player_config.get('title'),
            'description': self._og_search_description(webpage),
            'formats': formats,
            'subtitles': subtitles,
            'is_live': player_config.get('type') == 'live',
            'thumbnail': self._og_search_thumbnail(webpage),
        }
 class WinSportsVideoIE(InfoExtractor):
    _VALID_URL = r'https?://www\.winsports\.co/videos/(?P<display_id>[\w-]+)-(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.winsports.co/videos/siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco-60536',
        'info_dict': {
            'id': '62dc8357162c4b0821fcfb3c',
            'display_id': 'siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco',
            'title': '¡Siempre Castellanos! Gran atajada del portero \'cardenal\' para evitar la caída de su arco',
            'description': 'md5:eb811b2b2882bdc59431732c06b905f2',
            'thumbnail': r're:^https?://[^?#]+62dc8357162c4b0821fcfb3c',
            'ext': 'mp4',
        },
    }, {
        'url': 'https://www.winsports.co/videos/observa-aqui-los-goles-del-empate-entre-tolima-y-nacional-60548',
        'info_dict': {
            'id': '62dcb875ef12a5526790b552',
            'display_id': 'observa-aqui-los-goles-del-empate-entre-tolima-y-nacional',
            'title': 'Observa aquí los goles del empate entre Tolima y Nacional',
            'description': 'md5:b19402ba6e46558b93fd24b873eea9c9',
            'thumbnail': r're:^https?://[^?#]+62dcb875ef12a5526790b552',
            'ext': 'mp4',
        },
    }]
    def _real_extract(self, url):
        display_id, video_id = self._match_valid_url(url).group('display_id', 'id')
        webpage = self._download_webpage(url, display_id)
        media_setting_json = self._search_json(
            r'<script\s*[^>]+data-drupal-selector="drupal-settings-json">', webpage, 'drupal-setting-json', display_id)
        mediastream_id = media_setting_json['settings']['mediastream_formatter'][video_id]['mediastream_id']
        return self.url_result(
            f'https://mdstrm.com/embed/{mediastream_id}', MediaStreamIE, video_id, url_transparent=True,
            display_id=display_id, video_title=clean_html(get_element_html_by_class('title-news', webpage)))
--- a/yt_dlp/extractor/rumble.py
+++ b/yt_dlp/extractor/rumble.py
@ -4,15 +4,11 @@
 from .common import InfoExtractor
 from ..compat import compat_HTTPError
 from ..utils import (
    ExtractorError,
    UnsupportedError,
    clean_html,
    get_element_by_class,
    int_or_none,
    parse_count,
    parse_iso8601,
    traverse_obj,
    unescapeHTML,
    ExtractorError,
 )
@ -115,6 +111,24 @@ class RumbleEmbedIE(InfoExtractor):
    }]
    _WEBPAGE_TESTS = [
        {
            'note': 'Rumble embed',
            'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html',
            'md5': '53af34098a7f92c4e51cf0bd1c33f009',
            'info_dict': {
                'id': 'vb0ofn',
                'ext': 'mp4',
                'timestamp': 1612662578,
                'uploader': 'LovingMontana',
                'channel': 'LovingMontana',
                'upload_date': '20210207',
                'title': 'Winter-loving dog helps girls dig a snow fort ',
                'channel_url': 'https://rumble.com/c/c-546523',
                'thumbnail': 'https://sp.rmbl.ws/s8/1/5/f/x/x/5fxxb.OvCc.1-small-Moose-The-Dog-Helps-Girls-D.jpg',
                'duration': 103,
                'live_status': 'not_live',
            }
        },
        {
            'note': 'Rumble JS embed',
            'url': 'https://therightscoop.com/what-does-9-plus-1-plus-1-equal-listen-to-this-audio-of-attempted-kavanaugh-assassins-call-and-youll-get-it',
@ -221,84 +235,6 @@ def _real_extract(self, url):
        }
 class RumbleIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?rumble\.com/(?P<id>v(?!ideos)[\w.-]+)[^/]*$'
    _EMBED_REGEX = [r'<a class=video-item--a href=(?P<url>/v[\w.-]+\.html)>']
    _TESTS = [{
        'add_ie': ['RumbleEmbed'],
        'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html',
        'md5': '53af34098a7f92c4e51cf0bd1c33f009',
        'info_dict': {
            'id': 'vb0ofn',
            'ext': 'mp4',
            'timestamp': 1612662578,
            'uploader': 'LovingMontana',
            'channel': 'LovingMontana',
            'upload_date': '20210207',
            'title': 'Winter-loving dog helps girls dig a snow fort ',
            'description': 'Moose the dog is more than happy to help with digging out this epic snow fort. Great job, Moose!',
            'channel_url': 'https://rumble.com/c/c-546523',
            'thumbnail': r're:https://.+\.jpg',
            'duration': 103,
            'like_count': int,
            'view_count': int,
            'live_status': 'not_live',
        }
    }, {
        'url': 'http://www.rumble.com/vDMUM1?key=value',
        'only_matching': True,
    }]
    _WEBPAGE_TESTS = [{
        'url': 'https://rumble.com/videos?page=2',
        'playlist_count': 25,
        'info_dict': {
            'id': 'videos?page=2',
            'title': 'All videos',
            'description': 'Browse videos uploaded to Rumble.com',
            'age_limit': 0,
        },
    }, {
        'url': 'https://rumble.com/live-videos',
        'playlist_mincount': 19,
        'info_dict': {
            'id': 'live-videos',
            'title': 'Live Videos',
            'description': 'Live videos on Rumble.com',
            'age_limit': 0,
        },
    }, {
        'url': 'https://rumble.com/search/video?q=rumble&sort=views',
        'playlist_count': 24,
        'info_dict': {
            'id': 'video?q=rumble&sort=views',
            'title': 'Search results for: rumble',
            'age_limit': 0,
        },
    }]
    def _real_extract(self, url):
        page_id = self._match_id(url)
        webpage = self._download_webpage(url, page_id)
        url_info = next(RumbleEmbedIE.extract_from_webpage(self._downloader, url, webpage), None)
        if not url_info:
            raise UnsupportedError(url)
        release_ts_str = self._search_regex(
            r'(?:Livestream begins|Streamed on):\s+<time datetime="([^"]+)',
            webpage, 'release date', fatal=False, default=None)
        view_count_str = self._search_regex(r'<span class="media-heading-info">([\d,]+) Views',
                                            webpage, 'view count', fatal=False, default=None)
        return self.url_result(
            url_info['url'], ie_key=url_info['ie_key'], url_transparent=True,
            view_count=parse_count(view_count_str),
            release_timestamp=parse_iso8601(release_ts_str),
            like_count=parse_count(get_element_by_class('rumbles-count', webpage)),
            description=clean_html(get_element_by_class('media-description', webpage)),
        )
 class RumbleChannelIE(InfoExtractor):
    _VALID_URL = r'(?P<url>https?://(?:www\.)?rumble\.com/(?:c|user)/(?P<id>[^&?#$/]+))'
--- a/yt_dlp/extractor/webcamerapl.py
+++ b/yt_dlp/extractor/webcamerapl.py
@ -1,44 +0,0 @@
 import codecs
 from .common import InfoExtractor
 class WebcameraplIE(InfoExtractor):
    _VALID_URL = r'https?://(?P<id>[\w-]+)\.webcamera\.pl'
    _TESTS = [{
        'url': 'https://warszawa-plac-zamkowy.webcamera.pl',
        'info_dict': {
            'id': 'warszawa-plac-zamkowy',
            'ext': 'mp4',
            'title': r're:WIDOK NA PLAC ZAMKOWY W WARSZAWIE \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
            'live_status': 'is_live',
        }
    }, {
        'url': 'https://gdansk-stare-miasto.webcamera.pl/',
        'info_dict': {
            'id': 'gdansk-stare-miasto',
            'ext': 'mp4',
            'title': r're:GDAŃSK - widok na Stare Miasto \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
            'live_status': 'is_live',
        }
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        rot13_m3u8_url = self._search_regex(r'data-src\s*=\s*"(uggc[^"]+\.z3h8)"',
                                            webpage, 'm3u8 url', default=None)
        if not rot13_m3u8_url:
            self.raise_no_formats('No video/audio found at the provided url', expected=True)
        m3u8_url = codecs.decode(rot13_m3u8_url, 'rot-13')
        formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, live=True)
        return {
            'id': video_id,
            'title': self._html_search_regex(r'<h1\b[^>]*>([^>]+)</h1>', webpage, 'title'),
            'formats': formats,
            'subtitles': subtitles,
            'is_live': True,
        }
--- a/yt_dlp/jsinterp.py
+++ b/yt_dlp/jsinterp.py
@ -343,8 +343,7 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
            inner, outer = self._separate(expr, expr[0], 1)
            if expr[0] == '/':
                flags, outer = self._regex_flags(outer)
-                # Avoid https://github.com/python/cpython/issues/74534
+                inner = re.compile(inner[1:], flags=flags)
                inner = re.compile(inner[1:].replace('[[', r'[\['), flags=flags)
            else:
                inner = json.loads(js_to_json(f'{inner}{expr[0]}', strict=True))
            if not outer: