2024-11-29 19:41:31 +00:00
6 changed files with 40 additions and 324 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -982,10 +982,6 @@
    MediasiteCatalogIE,
    MediasiteNamedCatalogIE,
 )
-from .mediastream import (
-    MediaStreamIE,
-    WinSportsVideoIE,
-)
 from .mediaworksnz import MediaWorksNZVODIE
 from .medici import MediciIE
 from .megaphone import MegaphoneIE
@ -1572,7 +1568,6 @@
 from .rule34video import Rule34VideoIE
 from .rumble import (
    RumbleEmbedIE,
-    RumbleIE,
    RumbleChannelIE,
 )
 from .rutube import (
@ -2198,7 +2193,6 @@
    WDRElefantIE,
    WDRMobileIE,
 )
-from .webcamerapl import WebcameraplIE
 from .webcaster import (
    WebcasterIE,
    WebcasterFeedIE,
--- a/yt_dlp/extractor/la7.py
+++ b/yt_dlp/extractor/la7.py
@ -2,6 +2,7 @@

 from .common import InfoExtractor
 from ..utils import (
+    determine_ext,
    float_or_none,
    HEADRequest,
    int_or_none,
@ -12,13 +13,13 @@

 class LA7IE(InfoExtractor):
    IE_NAME = 'la7.it'
-    _VALID_URL = r'''(?x)https?://(?:
-        (?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video|news)/|
+    _VALID_URL = r'''(?x)(https?://)?(?:
+        (?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video)/|
        tg\.la7\.it/repliche-tgla7\?id=
    )(?P<id>.+)'''

    _TESTS = [{
-        # single quality video
+        # 'src' is a plain URL
        'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722',
        'md5': '8b613ffc0c4bf9b9e377169fc19c214c',
        'info_dict': {
@ -28,20 +29,6 @@ class LA7IE(InfoExtractor):
            'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico',
            'thumbnail': 're:^https?://.*',
            'upload_date': '20151002',
-            'formats': 'count:4',
-        },
-    }, {
-        # multiple quality video
-        'url': 'https://www.la7.it/calcio-femminile/news/il-gol-di-lindsey-thomas-fiorentina-vs-milan-serie-a-calcio-femminile-26-11-2022-461736',
-        'md5': 'd2370e78f75e8d1238cb3a0db9a2eda3',
-        'info_dict': {
-            'id': 'il-gol-di-lindsey-thomas-fiorentina-vs-milan-serie-a-calcio-femminile-26-11-2022-461736',
-            'ext': 'mp4',
-            'title': 'Il gol di Lindsey Thomas | Fiorentina vs Milan | Serie A Calcio Femminile',
-            'description': 'Il gol di Lindsey Thomas | Fiorentina vs Milan | Serie A Calcio Femminile',
-            'thumbnail': 're:^https?://.*',
-            'upload_date': '20221126',
-            'formats': 'count:8',
        },
    }, {
        'url': 'http://www.la7.it/omnibus/rivedila7/omnibus-news-02-07-2016-189077',
@ -52,7 +39,7 @@ class LA7IE(InfoExtractor):
    def _generate_mp4_url(self, quality, m3u8_formats):
        for f in m3u8_formats:
            if f['vcodec'] != 'none' and quality in f['url']:
-                http_url = f'{self._HOST}{quality}.mp4'
+                http_url = '%s%s.mp4' % (self._HOST, quality)

                urlh = self._request_webpage(
                    HEADRequest(http_url), quality,
@ -71,13 +58,12 @@ def _generate_mp4_url(self, quality, m3u8_formats):

    def _real_extract(self, url):
        video_id = self._match_id(url)
+
+        if not url.startswith('http'):
+            url = '%s//%s' % (self.http_scheme(), url)
+
        webpage = self._download_webpage(url, video_id)
-
-        if re.search(r'(?i)(drmsupport\s*:\s*true)\s*', webpage):
-            self.report_drm(video_id)
-
-        video_path = self._search_regex(
-            r'(/content/[\w/,]+?)\.mp4(?:\.csmil)?/master\.m3u8', webpage, 'video_path')
+        video_path = self._search_regex(r'(/content/.*?).mp4', webpage, 'video_path')

        formats = self._extract_mpd_formats(
            f'{self._HOST}/local/dash/,{video_path}.mp4.urlset/manifest.mpd',
@ -104,7 +90,8 @@ def _real_extract(self, url):

 class LA7PodcastEpisodeIE(InfoExtractor):
    IE_NAME = 'la7.it:pod:episode'
-    _VALID_URL = r'https?://(?:www\.)?la7\.it/[^/]+/podcast/([^/]+-)?(?P<id>\d+)'
+    _VALID_URL = r'''(?x)(https?://)?
+        (?:www\.)?la7\.it/[^/]+/podcast/([^/]+-)?(?P<id>\d+)'''

    _TESTS = [{
        'url': 'https://www.la7.it/voicetown/podcast/la-carezza-delle-memoria-di-carlo-verdone-23-03-2021-371497',
@ -138,15 +125,14 @@ def _extract_info(self, webpage, video_id=None, ppn=None):
                webpage, 'video_id', group='vid')

        media_url = self._search_regex(
-            (r'src\s*:\s*([\'"])(?P<url>\S+?mp3.+?)\1',
-             r'data-podcast\s*=\s*([\'"])(?P<url>\S+?mp3.+?)\1'),
+            (r'src:\s*([\'"])(?P<url>.+?mp3.+?)\1',
+             r'data-podcast=([\'"])(?P<url>.+?mp3.+?)\1'),
            webpage, 'media_url', group='url')
+        ext = determine_ext(media_url)
        formats = [{
            'url': media_url,
-            'format_id': 'http-mp3',
-            'ext': 'mp3',
-            'acodec': 'mp3',
-            'vcodec': 'none',
+            'format_id': ext,
+            'ext': ext,
        }]

        title = self._html_search_regex(
@ -187,7 +173,7 @@ def _extract_info(self, webpage, video_id=None, ppn=None):
        # and title is the same as the show_title
        # add the date to the title
        if date and not date_alt and ppn and ppn.lower() == title.lower():
-            title = f'{title} del {date}'
+            title += ' del %s' % date
        return {
            'id': video_id,
            'title': title,
@ -207,7 +193,7 @@ def _real_extract(self, url):

 class LA7PodcastIE(LA7PodcastEpisodeIE):  # XXX: Do not subclass from concrete IE
    IE_NAME = 'la7.it:podcast'
-    _VALID_URL = r'https?://(?:www\.)?la7\.it/(?P<id>[^/]+)/podcast/?(?:$|[#?])'
+    _VALID_URL = r'(https?://)?(www\.)?la7\.it/(?P<id>[^/]+)/podcast/?(?:$|[#?])'

    _TESTS = [{
        'url': 'https://www.la7.it/propagandalive/podcast',
@ -215,7 +201,7 @@ class LA7PodcastIE(LA7PodcastEpisodeIE):  # XXX: Do not subclass from concrete I
            'id': 'propagandalive',
            'title': "Propaganda Live",
        },
-        'playlist_count_min': 10,
+        'playlist_count': 10,
    }]

    def _real_extract(self, url):
--- a/yt_dlp/extractor/mediastream.py
+++ b/yt_dlp/extractor/mediastream.py
@ -1,155 +0,0 @@
-import re
-
-from .common import InfoExtractor
-from ..utils import clean_html, get_element_html_by_class
-
-
-class MediaStreamIE(InfoExtractor):
-    _VALID_URL = r'https?://mdstrm.com/(?:embed|live-stream)/(?P<id>\w+)'
-
-    _TESTS = [{
-        'url': 'https://mdstrm.com/embed/6318e3f1d1d316083ae48831',
-        'md5': '97b4f2634b8e8612cc574dfcd504df05',
-        'info_dict': {
-            'id': '6318e3f1d1d316083ae48831',
-            'title': 'Video: Así fue el despido de Thomas Tuchel del Chelsea',
-            'description': 'md5:358ce1e1396010d50a1ece1be3633c95',
-            'thumbnail': r're:^https?://[^?#]+6318e3f1d1d316083ae48831',
-            'ext': 'mp4',
-        },
-    }]
-
-    _WEBPAGE_TESTS = [{
-        'url': 'https://www.multimedios.com/video/costa-rica-tv-en-vivo/v2616',
-        'info_dict': {
-            'id': '5a7b1e63a8da282c34d65445',
-            'title': 're:mmtv-costarica',
-            'description': 'mmtv-costarica',
-            'thumbnail': 're:^https?://[^?#]+5a7b1e63a8da282c34d65445',
-            'ext': 'mp4',
-            'live_status': 'is_live',
-        },
-        'params': {
-            'skip_download': 'Livestream'
-        },
-    }, {
-        'url': 'https://www.multimedios.com/television/clases-de-llaves-y-castigos-quien-sabe-mas',
-        'md5': 'de31f0b1ecc321fb35bf22d58734ea40',
-        'info_dict': {
-            'id': '63731bab8ec9b308a2c9ed28',
-            'title': 'Clases de llaves y castigos ¿Quién sabe más?',
-            'description': 'md5:1b49aa1ee5a4b32fbd66104b2d629e9d',
-            'thumbnail': 're:^https?://[^?#]+63731bab8ec9b308a2c9ed28',
-            'ext': 'mp4',
-        },
-    }, {
-        'url': 'https://www.americatv.com.pe/videos/esto-es-guerra/facundo-gonzalez-sufrio-fuerte-golpe-durante-competencia-frente-hugo-garcia-eeg-noticia-139120',
-        'info_dict': {
-            'id': '63756df1c638b008a5659dec',
-            'title': 'Facundo González sufrió fuerte golpe durante competencia frente a Hugo García en EEG',
-            'description': 'md5:9490c034264afd756eef7b2c3adee69e',
-            'thumbnail': 're:^https?://[^?#]+63756df1c638b008a5659dec',
-            'ext': 'mp4',
-        },
-    }, {
-        'url': 'https://www.americatv.com.pe/videos/al-fondo-hay-sitio/nuevas-lomas-town-bernardo-mata-se-enfrento-sujeto-luchar-amor-macarena-noticia-139083',
-        'info_dict': {
-            'id': '637307669609130f74cd3a6e',
-            'title': 'Las Nuevas Lomas Town: Bernardo De La Mata se enfrentó a sujeto para luchar por el amor de Macarena',
-            'description': 'md5:60d71772f1e1496923539ae58aa17124',
-            'thumbnail': 're:^https?://[^?#]+637307669609130f74cd3a6e',
-            'ext': 'mp4',
-        },
-    }]
-
-    @classmethod
-    def _extract_embed_urls(cls, url, webpage):
-        for mobj in re.finditer(r'<script[^>]+>[^>]*playerMdStream.mdstreamVideo\(\s*[\'"](?P<video_id>\w+)', webpage):
-            yield f'https://mdstrm.com/embed/{mobj.group("video_id")}'
-
-        yield from re.findall(
-            r'<iframe[^>]src\s*=\s*"(https://mdstrm.com/[\w-]+/\w+)', webpage)
-
-        for mobj in re.finditer(
-            r'''(?x)
-                <(?:div|ps-mediastream)[^>]+
-                class\s*=\s*"[^"]*MediaStreamVideoPlayer[^"]*"[^>]+
-                data-video-id\s*=\s*"(?P<video_id>\w+)\s*"
-                (?:\s*data-video-type\s*=\s*"(?P<video_type>[^"]+))?
-                ''', webpage):
-
-            video_type = 'live-stream' if mobj.group('video_type') == 'live' else 'embed'
-            yield f'https://mdstrm.com/{video_type}/{mobj.group("video_id")}'
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
-        if 'Debido a tu ubicación no puedes ver el contenido' in webpage:
-            self.raise_geo_restricted()
-
-        player_config = self._search_json(r'window.MDSTRM.OPTIONS\s*=', webpage, 'metadata', video_id)
-
-        formats, subtitles = [], {}
-        for video_format in player_config['src']:
-            if video_format == 'hls':
-                fmts, subs = self._extract_m3u8_formats_and_subtitles(player_config['src'][video_format], video_id)
-                formats.extend(fmts)
-                self._merge_subtitles(subs, target=subtitles)
-            elif video_format == 'mpd':
-                fmts, subs = self._extract_mpd_formats_and_subtitles(player_config['src'][video_format], video_id)
-                formats.extend(fmts)
-                self._merge_subtitles(subs, target=subtitles)
-            else:
-                formats.append({
-                    'url': player_config['src'][video_format],
-                })
-
-        return {
-            'id': video_id,
-            'title': self._og_search_title(webpage) or player_config.get('title'),
-            'description': self._og_search_description(webpage),
-            'formats': formats,
-            'subtitles': subtitles,
-            'is_live': player_config.get('type') == 'live',
-            'thumbnail': self._og_search_thumbnail(webpage),
-        }
-
-
-class WinSportsVideoIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.winsports\.co/videos/(?P<display_id>[\w-]+)-(?P<id>\d+)'
-
-    _TESTS = [{
-        'url': 'https://www.winsports.co/videos/siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco-60536',
-        'info_dict': {
-            'id': '62dc8357162c4b0821fcfb3c',
-            'display_id': 'siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco',
-            'title': '¡Siempre Castellanos! Gran atajada del portero \'cardenal\' para evitar la caída de su arco',
-            'description': 'md5:eb811b2b2882bdc59431732c06b905f2',
-            'thumbnail': r're:^https?://[^?#]+62dc8357162c4b0821fcfb3c',
-            'ext': 'mp4',
-        },
-    }, {
-        'url': 'https://www.winsports.co/videos/observa-aqui-los-goles-del-empate-entre-tolima-y-nacional-60548',
-        'info_dict': {
-            'id': '62dcb875ef12a5526790b552',
-            'display_id': 'observa-aqui-los-goles-del-empate-entre-tolima-y-nacional',
-            'title': 'Observa aquí los goles del empate entre Tolima y Nacional',
-            'description': 'md5:b19402ba6e46558b93fd24b873eea9c9',
-            'thumbnail': r're:^https?://[^?#]+62dcb875ef12a5526790b552',
-            'ext': 'mp4',
-        },
-    }]
-
-    def _real_extract(self, url):
-        display_id, video_id = self._match_valid_url(url).group('display_id', 'id')
-        webpage = self._download_webpage(url, display_id)
-
-        media_setting_json = self._search_json(
-            r'<script\s*[^>]+data-drupal-selector="drupal-settings-json">', webpage, 'drupal-setting-json', display_id)
-
-        mediastream_id = media_setting_json['settings']['mediastream_formatter'][video_id]['mediastream_id']
-
-        return self.url_result(
-            f'https://mdstrm.com/embed/{mediastream_id}', MediaStreamIE, video_id, url_transparent=True,
-            display_id=display_id, video_title=clean_html(get_element_html_by_class('title-news', webpage)))
--- a/yt_dlp/extractor/rumble.py
+++ b/yt_dlp/extractor/rumble.py
@ -4,15 +4,11 @@
 from .common import InfoExtractor
 from ..compat import compat_HTTPError
 from ..utils import (
-    ExtractorError,
-    UnsupportedError,
-    clean_html,
-    get_element_by_class,
    int_or_none,
-    parse_count,
    parse_iso8601,
    traverse_obj,
    unescapeHTML,
+    ExtractorError,
 )


@ -115,6 +111,24 @@ class RumbleEmbedIE(InfoExtractor):
    }]

    _WEBPAGE_TESTS = [
+        {
+            'note': 'Rumble embed',
+            'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html',
+            'md5': '53af34098a7f92c4e51cf0bd1c33f009',
+            'info_dict': {
+                'id': 'vb0ofn',
+                'ext': 'mp4',
+                'timestamp': 1612662578,
+                'uploader': 'LovingMontana',
+                'channel': 'LovingMontana',
+                'upload_date': '20210207',
+                'title': 'Winter-loving dog helps girls dig a snow fort ',
+                'channel_url': 'https://rumble.com/c/c-546523',
+                'thumbnail': 'https://sp.rmbl.ws/s8/1/5/f/x/x/5fxxb.OvCc.1-small-Moose-The-Dog-Helps-Girls-D.jpg',
+                'duration': 103,
+                'live_status': 'not_live',
+            }
+        },
        {
            'note': 'Rumble JS embed',
            'url': 'https://therightscoop.com/what-does-9-plus-1-plus-1-equal-listen-to-this-audio-of-attempted-kavanaugh-assassins-call-and-youll-get-it',
@ -221,84 +235,6 @@ def _real_extract(self, url):
        }


-class RumbleIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?rumble\.com/(?P<id>v(?!ideos)[\w.-]+)[^/]*$'
-    _EMBED_REGEX = [r'<a class=video-item--a href=(?P<url>/v[\w.-]+\.html)>']
-    _TESTS = [{
-        'add_ie': ['RumbleEmbed'],
-        'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html',
-        'md5': '53af34098a7f92c4e51cf0bd1c33f009',
-        'info_dict': {
-            'id': 'vb0ofn',
-            'ext': 'mp4',
-            'timestamp': 1612662578,
-            'uploader': 'LovingMontana',
-            'channel': 'LovingMontana',
-            'upload_date': '20210207',
-            'title': 'Winter-loving dog helps girls dig a snow fort ',
-            'description': 'Moose the dog is more than happy to help with digging out this epic snow fort. Great job, Moose!',
-            'channel_url': 'https://rumble.com/c/c-546523',
-            'thumbnail': r're:https://.+\.jpg',
-            'duration': 103,
-            'like_count': int,
-            'view_count': int,
-            'live_status': 'not_live',
-        }
-    }, {
-        'url': 'http://www.rumble.com/vDMUM1?key=value',
-        'only_matching': True,
-    }]
-
-    _WEBPAGE_TESTS = [{
-        'url': 'https://rumble.com/videos?page=2',
-        'playlist_count': 25,
-        'info_dict': {
-            'id': 'videos?page=2',
-            'title': 'All videos',
-            'description': 'Browse videos uploaded to Rumble.com',
-            'age_limit': 0,
-        },
-    }, {
-        'url': 'https://rumble.com/live-videos',
-        'playlist_mincount': 19,
-        'info_dict': {
-            'id': 'live-videos',
-            'title': 'Live Videos',
-            'description': 'Live videos on Rumble.com',
-            'age_limit': 0,
-        },
-    }, {
-        'url': 'https://rumble.com/search/video?q=rumble&sort=views',
-        'playlist_count': 24,
-        'info_dict': {
-            'id': 'video?q=rumble&sort=views',
-            'title': 'Search results for: rumble',
-            'age_limit': 0,
-        },
-    }]
-
-    def _real_extract(self, url):
-        page_id = self._match_id(url)
-        webpage = self._download_webpage(url, page_id)
-        url_info = next(RumbleEmbedIE.extract_from_webpage(self._downloader, url, webpage), None)
-        if not url_info:
-            raise UnsupportedError(url)
-
-        release_ts_str = self._search_regex(
-            r'(?:Livestream begins|Streamed on):\s+<time datetime="([^"]+)',
-            webpage, 'release date', fatal=False, default=None)
-        view_count_str = self._search_regex(r'<span class="media-heading-info">([\d,]+) Views',
-                                            webpage, 'view count', fatal=False, default=None)
-
-        return self.url_result(
-            url_info['url'], ie_key=url_info['ie_key'], url_transparent=True,
-            view_count=parse_count(view_count_str),
-            release_timestamp=parse_iso8601(release_ts_str),
-            like_count=parse_count(get_element_by_class('rumbles-count', webpage)),
-            description=clean_html(get_element_by_class('media-description', webpage)),
-        )
-
-
 class RumbleChannelIE(InfoExtractor):
    _VALID_URL = r'(?P<url>https?://(?:www\.)?rumble\.com/(?:c|user)/(?P<id>[^&?#$/]+))'

--- a/yt_dlp/extractor/webcamerapl.py
+++ b/yt_dlp/extractor/webcamerapl.py
@ -1,44 +0,0 @@
-import codecs
-
-from .common import InfoExtractor
-
-
-class WebcameraplIE(InfoExtractor):
-    _VALID_URL = r'https?://(?P<id>[\w-]+)\.webcamera\.pl'
-    _TESTS = [{
-        'url': 'https://warszawa-plac-zamkowy.webcamera.pl',
-        'info_dict': {
-            'id': 'warszawa-plac-zamkowy',
-            'ext': 'mp4',
-            'title': r're:WIDOK NA PLAC ZAMKOWY W WARSZAWIE \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
-            'live_status': 'is_live',
-        }
-    }, {
-        'url': 'https://gdansk-stare-miasto.webcamera.pl/',
-        'info_dict': {
-            'id': 'gdansk-stare-miasto',
-            'ext': 'mp4',
-            'title': r're:GDAŃSK - widok na Stare Miasto \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
-            'live_status': 'is_live',
-        }
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
-        rot13_m3u8_url = self._search_regex(r'data-src\s*=\s*"(uggc[^"]+\.z3h8)"',
-                                            webpage, 'm3u8 url', default=None)
-        if not rot13_m3u8_url:
-            self.raise_no_formats('No video/audio found at the provided url', expected=True)
-
-        m3u8_url = codecs.decode(rot13_m3u8_url, 'rot-13')
-        formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, live=True)
-
-        return {
-            'id': video_id,
-            'title': self._html_search_regex(r'<h1\b[^>]*>([^>]+)</h1>', webpage, 'title'),
-            'formats': formats,
-            'subtitles': subtitles,
-            'is_live': True,
-        }
--- a/yt_dlp/jsinterp.py
+++ b/yt_dlp/jsinterp.py
@ -343,8 +343,7 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
            inner, outer = self._separate(expr, expr[0], 1)
            if expr[0] == '/':
                flags, outer = self._regex_flags(outer)
-                # Avoid https://github.com/python/cpython/issues/74534
-                inner = re.compile(inner[1:].replace('[[', r'[\['), flags=flags)
+                inner = re.compile(inner[1:], flags=flags)
            else:
                inner = json.loads(js_to_json(f'{inner}{expr[0]}', strict=True))
            if not outer: