Compare commits

..

No commits in common. "3d79ebc8b7e2b1fe3be8cbd0957b00ef29f8647a" and "28b8f57b4b2a2e1bd1fbe68ae1ab2c44fdd51992" have entirely different histories.

6 changed files with 40 additions and 324 deletions

View file

@ -982,10 +982,6 @@
MediasiteCatalogIE, MediasiteCatalogIE,
MediasiteNamedCatalogIE, MediasiteNamedCatalogIE,
) )
from .mediastream import (
MediaStreamIE,
WinSportsVideoIE,
)
from .mediaworksnz import MediaWorksNZVODIE from .mediaworksnz import MediaWorksNZVODIE
from .medici import MediciIE from .medici import MediciIE
from .megaphone import MegaphoneIE from .megaphone import MegaphoneIE
@ -1572,7 +1568,6 @@
from .rule34video import Rule34VideoIE from .rule34video import Rule34VideoIE
from .rumble import ( from .rumble import (
RumbleEmbedIE, RumbleEmbedIE,
RumbleIE,
RumbleChannelIE, RumbleChannelIE,
) )
from .rutube import ( from .rutube import (
@ -2198,7 +2193,6 @@
WDRElefantIE, WDRElefantIE,
WDRMobileIE, WDRMobileIE,
) )
from .webcamerapl import WebcameraplIE
from .webcaster import ( from .webcaster import (
WebcasterIE, WebcasterIE,
WebcasterFeedIE, WebcasterFeedIE,

View file

@ -2,6 +2,7 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext,
float_or_none, float_or_none,
HEADRequest, HEADRequest,
int_or_none, int_or_none,
@ -12,13 +13,13 @@
class LA7IE(InfoExtractor): class LA7IE(InfoExtractor):
IE_NAME = 'la7.it' IE_NAME = 'la7.it'
_VALID_URL = r'''(?x)https?://(?: _VALID_URL = r'''(?x)(https?://)?(?:
(?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video|news)/| (?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video)/|
tg\.la7\.it/repliche-tgla7\?id= tg\.la7\.it/repliche-tgla7\?id=
)(?P<id>.+)''' )(?P<id>.+)'''
_TESTS = [{ _TESTS = [{
# single quality video # 'src' is a plain URL
'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722', 'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722',
'md5': '8b613ffc0c4bf9b9e377169fc19c214c', 'md5': '8b613ffc0c4bf9b9e377169fc19c214c',
'info_dict': { 'info_dict': {
@ -28,20 +29,6 @@ class LA7IE(InfoExtractor):
'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico', 'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico',
'thumbnail': 're:^https?://.*', 'thumbnail': 're:^https?://.*',
'upload_date': '20151002', 'upload_date': '20151002',
'formats': 'count:4',
},
}, {
# multiple quality video
'url': 'https://www.la7.it/calcio-femminile/news/il-gol-di-lindsey-thomas-fiorentina-vs-milan-serie-a-calcio-femminile-26-11-2022-461736',
'md5': 'd2370e78f75e8d1238cb3a0db9a2eda3',
'info_dict': {
'id': 'il-gol-di-lindsey-thomas-fiorentina-vs-milan-serie-a-calcio-femminile-26-11-2022-461736',
'ext': 'mp4',
'title': 'Il gol di Lindsey Thomas | Fiorentina vs Milan | Serie A Calcio Femminile',
'description': 'Il gol di Lindsey Thomas | Fiorentina vs Milan | Serie A Calcio Femminile',
'thumbnail': 're:^https?://.*',
'upload_date': '20221126',
'formats': 'count:8',
}, },
}, { }, {
'url': 'http://www.la7.it/omnibus/rivedila7/omnibus-news-02-07-2016-189077', 'url': 'http://www.la7.it/omnibus/rivedila7/omnibus-news-02-07-2016-189077',
@ -52,7 +39,7 @@ class LA7IE(InfoExtractor):
def _generate_mp4_url(self, quality, m3u8_formats): def _generate_mp4_url(self, quality, m3u8_formats):
for f in m3u8_formats: for f in m3u8_formats:
if f['vcodec'] != 'none' and quality in f['url']: if f['vcodec'] != 'none' and quality in f['url']:
http_url = f'{self._HOST}{quality}.mp4' http_url = '%s%s.mp4' % (self._HOST, quality)
urlh = self._request_webpage( urlh = self._request_webpage(
HEADRequest(http_url), quality, HEADRequest(http_url), quality,
@ -71,13 +58,12 @@ def _generate_mp4_url(self, quality, m3u8_formats):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
if not url.startswith('http'):
url = '%s//%s' % (self.http_scheme(), url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_path = self._search_regex(r'(/content/.*?).mp4', webpage, 'video_path')
if re.search(r'(?i)(drmsupport\s*:\s*true)\s*', webpage):
self.report_drm(video_id)
video_path = self._search_regex(
r'(/content/[\w/,]+?)\.mp4(?:\.csmil)?/master\.m3u8', webpage, 'video_path')
formats = self._extract_mpd_formats( formats = self._extract_mpd_formats(
f'{self._HOST}/local/dash/,{video_path}.mp4.urlset/manifest.mpd', f'{self._HOST}/local/dash/,{video_path}.mp4.urlset/manifest.mpd',
@ -104,7 +90,8 @@ def _real_extract(self, url):
class LA7PodcastEpisodeIE(InfoExtractor): class LA7PodcastEpisodeIE(InfoExtractor):
IE_NAME = 'la7.it:pod:episode' IE_NAME = 'la7.it:pod:episode'
_VALID_URL = r'https?://(?:www\.)?la7\.it/[^/]+/podcast/([^/]+-)?(?P<id>\d+)' _VALID_URL = r'''(?x)(https?://)?
(?:www\.)?la7\.it/[^/]+/podcast/([^/]+-)?(?P<id>\d+)'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.la7.it/voicetown/podcast/la-carezza-delle-memoria-di-carlo-verdone-23-03-2021-371497', 'url': 'https://www.la7.it/voicetown/podcast/la-carezza-delle-memoria-di-carlo-verdone-23-03-2021-371497',
@ -138,15 +125,14 @@ def _extract_info(self, webpage, video_id=None, ppn=None):
webpage, 'video_id', group='vid') webpage, 'video_id', group='vid')
media_url = self._search_regex( media_url = self._search_regex(
(r'src\s*:\s*([\'"])(?P<url>\S+?mp3.+?)\1', (r'src:\s*([\'"])(?P<url>.+?mp3.+?)\1',
r'data-podcast\s*=\s*([\'"])(?P<url>\S+?mp3.+?)\1'), r'data-podcast=([\'"])(?P<url>.+?mp3.+?)\1'),
webpage, 'media_url', group='url') webpage, 'media_url', group='url')
ext = determine_ext(media_url)
formats = [{ formats = [{
'url': media_url, 'url': media_url,
'format_id': 'http-mp3', 'format_id': ext,
'ext': 'mp3', 'ext': ext,
'acodec': 'mp3',
'vcodec': 'none',
}] }]
title = self._html_search_regex( title = self._html_search_regex(
@ -187,7 +173,7 @@ def _extract_info(self, webpage, video_id=None, ppn=None):
# and title is the same as the show_title # and title is the same as the show_title
# add the date to the title # add the date to the title
if date and not date_alt and ppn and ppn.lower() == title.lower(): if date and not date_alt and ppn and ppn.lower() == title.lower():
title = f'{title} del {date}' title += ' del %s' % date
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
@ -207,7 +193,7 @@ def _real_extract(self, url):
class LA7PodcastIE(LA7PodcastEpisodeIE): # XXX: Do not subclass from concrete IE class LA7PodcastIE(LA7PodcastEpisodeIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'la7.it:podcast' IE_NAME = 'la7.it:podcast'
_VALID_URL = r'https?://(?:www\.)?la7\.it/(?P<id>[^/]+)/podcast/?(?:$|[#?])' _VALID_URL = r'(https?://)?(www\.)?la7\.it/(?P<id>[^/]+)/podcast/?(?:$|[#?])'
_TESTS = [{ _TESTS = [{
'url': 'https://www.la7.it/propagandalive/podcast', 'url': 'https://www.la7.it/propagandalive/podcast',
@ -215,7 +201,7 @@ class LA7PodcastIE(LA7PodcastEpisodeIE): # XXX: Do not subclass from concrete I
'id': 'propagandalive', 'id': 'propagandalive',
'title': "Propaganda Live", 'title': "Propaganda Live",
}, },
'playlist_count_min': 10, 'playlist_count': 10,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View file

@ -1,155 +0,0 @@
import re
from .common import InfoExtractor
from ..utils import clean_html, get_element_html_by_class
class MediaStreamIE(InfoExtractor):
_VALID_URL = r'https?://mdstrm.com/(?:embed|live-stream)/(?P<id>\w+)'
_TESTS = [{
'url': 'https://mdstrm.com/embed/6318e3f1d1d316083ae48831',
'md5': '97b4f2634b8e8612cc574dfcd504df05',
'info_dict': {
'id': '6318e3f1d1d316083ae48831',
'title': 'Video: Así fue el despido de Thomas Tuchel del Chelsea',
'description': 'md5:358ce1e1396010d50a1ece1be3633c95',
'thumbnail': r're:^https?://[^?#]+6318e3f1d1d316083ae48831',
'ext': 'mp4',
},
}]
_WEBPAGE_TESTS = [{
'url': 'https://www.multimedios.com/video/costa-rica-tv-en-vivo/v2616',
'info_dict': {
'id': '5a7b1e63a8da282c34d65445',
'title': 're:mmtv-costarica',
'description': 'mmtv-costarica',
'thumbnail': 're:^https?://[^?#]+5a7b1e63a8da282c34d65445',
'ext': 'mp4',
'live_status': 'is_live',
},
'params': {
'skip_download': 'Livestream'
},
}, {
'url': 'https://www.multimedios.com/television/clases-de-llaves-y-castigos-quien-sabe-mas',
'md5': 'de31f0b1ecc321fb35bf22d58734ea40',
'info_dict': {
'id': '63731bab8ec9b308a2c9ed28',
'title': 'Clases de llaves y castigos ¿Quién sabe más?',
'description': 'md5:1b49aa1ee5a4b32fbd66104b2d629e9d',
'thumbnail': 're:^https?://[^?#]+63731bab8ec9b308a2c9ed28',
'ext': 'mp4',
},
}, {
'url': 'https://www.americatv.com.pe/videos/esto-es-guerra/facundo-gonzalez-sufrio-fuerte-golpe-durante-competencia-frente-hugo-garcia-eeg-noticia-139120',
'info_dict': {
'id': '63756df1c638b008a5659dec',
'title': 'Facundo González sufrió fuerte golpe durante competencia frente a Hugo García en EEG',
'description': 'md5:9490c034264afd756eef7b2c3adee69e',
'thumbnail': 're:^https?://[^?#]+63756df1c638b008a5659dec',
'ext': 'mp4',
},
}, {
'url': 'https://www.americatv.com.pe/videos/al-fondo-hay-sitio/nuevas-lomas-town-bernardo-mata-se-enfrento-sujeto-luchar-amor-macarena-noticia-139083',
'info_dict': {
'id': '637307669609130f74cd3a6e',
'title': 'Las Nuevas Lomas Town: Bernardo De La Mata se enfrentó a sujeto para luchar por el amor de Macarena',
'description': 'md5:60d71772f1e1496923539ae58aa17124',
'thumbnail': 're:^https?://[^?#]+637307669609130f74cd3a6e',
'ext': 'mp4',
},
}]
@classmethod
def _extract_embed_urls(cls, url, webpage):
for mobj in re.finditer(r'<script[^>]+>[^>]*playerMdStream.mdstreamVideo\(\s*[\'"](?P<video_id>\w+)', webpage):
yield f'https://mdstrm.com/embed/{mobj.group("video_id")}'
yield from re.findall(
r'<iframe[^>]src\s*=\s*"(https://mdstrm.com/[\w-]+/\w+)', webpage)
for mobj in re.finditer(
r'''(?x)
<(?:div|ps-mediastream)[^>]+
class\s*=\s*"[^"]*MediaStreamVideoPlayer[^"]*"[^>]+
data-video-id\s*=\s*"(?P<video_id>\w+)\s*"
(?:\s*data-video-type\s*=\s*"(?P<video_type>[^"]+))?
''', webpage):
video_type = 'live-stream' if mobj.group('video_type') == 'live' else 'embed'
yield f'https://mdstrm.com/{video_type}/{mobj.group("video_id")}'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
if 'Debido a tu ubicación no puedes ver el contenido' in webpage:
self.raise_geo_restricted()
player_config = self._search_json(r'window.MDSTRM.OPTIONS\s*=', webpage, 'metadata', video_id)
formats, subtitles = [], {}
for video_format in player_config['src']:
if video_format == 'hls':
fmts, subs = self._extract_m3u8_formats_and_subtitles(player_config['src'][video_format], video_id)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
elif video_format == 'mpd':
fmts, subs = self._extract_mpd_formats_and_subtitles(player_config['src'][video_format], video_id)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
else:
formats.append({
'url': player_config['src'][video_format],
})
return {
'id': video_id,
'title': self._og_search_title(webpage) or player_config.get('title'),
'description': self._og_search_description(webpage),
'formats': formats,
'subtitles': subtitles,
'is_live': player_config.get('type') == 'live',
'thumbnail': self._og_search_thumbnail(webpage),
}
class WinSportsVideoIE(InfoExtractor):
_VALID_URL = r'https?://www\.winsports\.co/videos/(?P<display_id>[\w-]+)-(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.winsports.co/videos/siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco-60536',
'info_dict': {
'id': '62dc8357162c4b0821fcfb3c',
'display_id': 'siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco',
'title': '¡Siempre Castellanos! Gran atajada del portero \'cardenal\' para evitar la caída de su arco',
'description': 'md5:eb811b2b2882bdc59431732c06b905f2',
'thumbnail': r're:^https?://[^?#]+62dc8357162c4b0821fcfb3c',
'ext': 'mp4',
},
}, {
'url': 'https://www.winsports.co/videos/observa-aqui-los-goles-del-empate-entre-tolima-y-nacional-60548',
'info_dict': {
'id': '62dcb875ef12a5526790b552',
'display_id': 'observa-aqui-los-goles-del-empate-entre-tolima-y-nacional',
'title': 'Observa aquí los goles del empate entre Tolima y Nacional',
'description': 'md5:b19402ba6e46558b93fd24b873eea9c9',
'thumbnail': r're:^https?://[^?#]+62dcb875ef12a5526790b552',
'ext': 'mp4',
},
}]
def _real_extract(self, url):
display_id, video_id = self._match_valid_url(url).group('display_id', 'id')
webpage = self._download_webpage(url, display_id)
media_setting_json = self._search_json(
r'<script\s*[^>]+data-drupal-selector="drupal-settings-json">', webpage, 'drupal-setting-json', display_id)
mediastream_id = media_setting_json['settings']['mediastream_formatter'][video_id]['mediastream_id']
return self.url_result(
f'https://mdstrm.com/embed/{mediastream_id}', MediaStreamIE, video_id, url_transparent=True,
display_id=display_id, video_title=clean_html(get_element_html_by_class('title-news', webpage)))

View file

@ -4,15 +4,11 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..compat import compat_HTTPError
from ..utils import ( from ..utils import (
ExtractorError,
UnsupportedError,
clean_html,
get_element_by_class,
int_or_none, int_or_none,
parse_count,
parse_iso8601, parse_iso8601,
traverse_obj, traverse_obj,
unescapeHTML, unescapeHTML,
ExtractorError,
) )
@ -115,6 +111,24 @@ class RumbleEmbedIE(InfoExtractor):
}] }]
_WEBPAGE_TESTS = [ _WEBPAGE_TESTS = [
{
'note': 'Rumble embed',
'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html',
'md5': '53af34098a7f92c4e51cf0bd1c33f009',
'info_dict': {
'id': 'vb0ofn',
'ext': 'mp4',
'timestamp': 1612662578,
'uploader': 'LovingMontana',
'channel': 'LovingMontana',
'upload_date': '20210207',
'title': 'Winter-loving dog helps girls dig a snow fort ',
'channel_url': 'https://rumble.com/c/c-546523',
'thumbnail': 'https://sp.rmbl.ws/s8/1/5/f/x/x/5fxxb.OvCc.1-small-Moose-The-Dog-Helps-Girls-D.jpg',
'duration': 103,
'live_status': 'not_live',
}
},
{ {
'note': 'Rumble JS embed', 'note': 'Rumble JS embed',
'url': 'https://therightscoop.com/what-does-9-plus-1-plus-1-equal-listen-to-this-audio-of-attempted-kavanaugh-assassins-call-and-youll-get-it', 'url': 'https://therightscoop.com/what-does-9-plus-1-plus-1-equal-listen-to-this-audio-of-attempted-kavanaugh-assassins-call-and-youll-get-it',
@ -221,84 +235,6 @@ def _real_extract(self, url):
} }
class RumbleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rumble\.com/(?P<id>v(?!ideos)[\w.-]+)[^/]*$'
_EMBED_REGEX = [r'<a class=video-item--a href=(?P<url>/v[\w.-]+\.html)>']
_TESTS = [{
'add_ie': ['RumbleEmbed'],
'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html',
'md5': '53af34098a7f92c4e51cf0bd1c33f009',
'info_dict': {
'id': 'vb0ofn',
'ext': 'mp4',
'timestamp': 1612662578,
'uploader': 'LovingMontana',
'channel': 'LovingMontana',
'upload_date': '20210207',
'title': 'Winter-loving dog helps girls dig a snow fort ',
'description': 'Moose the dog is more than happy to help with digging out this epic snow fort. Great job, Moose!',
'channel_url': 'https://rumble.com/c/c-546523',
'thumbnail': r're:https://.+\.jpg',
'duration': 103,
'like_count': int,
'view_count': int,
'live_status': 'not_live',
}
}, {
'url': 'http://www.rumble.com/vDMUM1?key=value',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'https://rumble.com/videos?page=2',
'playlist_count': 25,
'info_dict': {
'id': 'videos?page=2',
'title': 'All videos',
'description': 'Browse videos uploaded to Rumble.com',
'age_limit': 0,
},
}, {
'url': 'https://rumble.com/live-videos',
'playlist_mincount': 19,
'info_dict': {
'id': 'live-videos',
'title': 'Live Videos',
'description': 'Live videos on Rumble.com',
'age_limit': 0,
},
}, {
'url': 'https://rumble.com/search/video?q=rumble&sort=views',
'playlist_count': 24,
'info_dict': {
'id': 'video?q=rumble&sort=views',
'title': 'Search results for: rumble',
'age_limit': 0,
},
}]
def _real_extract(self, url):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
url_info = next(RumbleEmbedIE.extract_from_webpage(self._downloader, url, webpage), None)
if not url_info:
raise UnsupportedError(url)
release_ts_str = self._search_regex(
r'(?:Livestream begins|Streamed on):\s+<time datetime="([^"]+)',
webpage, 'release date', fatal=False, default=None)
view_count_str = self._search_regex(r'<span class="media-heading-info">([\d,]+) Views',
webpage, 'view count', fatal=False, default=None)
return self.url_result(
url_info['url'], ie_key=url_info['ie_key'], url_transparent=True,
view_count=parse_count(view_count_str),
release_timestamp=parse_iso8601(release_ts_str),
like_count=parse_count(get_element_by_class('rumbles-count', webpage)),
description=clean_html(get_element_by_class('media-description', webpage)),
)
class RumbleChannelIE(InfoExtractor): class RumbleChannelIE(InfoExtractor):
_VALID_URL = r'(?P<url>https?://(?:www\.)?rumble\.com/(?:c|user)/(?P<id>[^&?#$/]+))' _VALID_URL = r'(?P<url>https?://(?:www\.)?rumble\.com/(?:c|user)/(?P<id>[^&?#$/]+))'

View file

@ -1,44 +0,0 @@
import codecs
from .common import InfoExtractor
class WebcameraplIE(InfoExtractor):
_VALID_URL = r'https?://(?P<id>[\w-]+)\.webcamera\.pl'
_TESTS = [{
'url': 'https://warszawa-plac-zamkowy.webcamera.pl',
'info_dict': {
'id': 'warszawa-plac-zamkowy',
'ext': 'mp4',
'title': r're:WIDOK NA PLAC ZAMKOWY W WARSZAWIE \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'live_status': 'is_live',
}
}, {
'url': 'https://gdansk-stare-miasto.webcamera.pl/',
'info_dict': {
'id': 'gdansk-stare-miasto',
'ext': 'mp4',
'title': r're:GDAŃSK - widok na Stare Miasto \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'live_status': 'is_live',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
rot13_m3u8_url = self._search_regex(r'data-src\s*=\s*"(uggc[^"]+\.z3h8)"',
webpage, 'm3u8 url', default=None)
if not rot13_m3u8_url:
self.raise_no_formats('No video/audio found at the provided url', expected=True)
m3u8_url = codecs.decode(rot13_m3u8_url, 'rot-13')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, live=True)
return {
'id': video_id,
'title': self._html_search_regex(r'<h1\b[^>]*>([^>]+)</h1>', webpage, 'title'),
'formats': formats,
'subtitles': subtitles,
'is_live': True,
}

View file

@ -343,8 +343,7 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
inner, outer = self._separate(expr, expr[0], 1) inner, outer = self._separate(expr, expr[0], 1)
if expr[0] == '/': if expr[0] == '/':
flags, outer = self._regex_flags(outer) flags, outer = self._regex_flags(outer)
# Avoid https://github.com/python/cpython/issues/74534 inner = re.compile(inner[1:], flags=flags)
inner = re.compile(inner[1:].replace('[[', r'[\['), flags=flags)
else: else:
inner = json.loads(js_to_json(f'{inner}{expr[0]}', strict=True)) inner = json.loads(js_to_json(f'{inner}{expr[0]}', strict=True))
if not outer: if not outer: