From bc4ab17b38f01000d99c5c2bedec89721fee65ec Mon Sep 17 00:00:00 2001 From: barsnick Date: Fri, 22 Dec 2023 02:32:29 +0100 Subject: [PATCH 01/17] [cleanup] Fix spelling of `IE_NAME` (#8810) Authored by: barsnick --- yt_dlp/extractor/iheart.py | 2 +- yt_dlp/extractor/kinja.py | 2 +- yt_dlp/extractor/nba.py | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/iheart.py b/yt_dlp/extractor/iheart.py index 2c6a5b6a1d..fb6f51e2ca 100644 --- a/yt_dlp/extractor/iheart.py +++ b/yt_dlp/extractor/iheart.py @@ -23,7 +23,7 @@ def _extract_episode(self, episode): class IHeartRadioIE(IHeartRadioBaseIE): - IENAME = 'iheartradio' + IE_NAME = 'iheartradio' _VALID_URL = r'(?:https?://(?:www\.)?iheart\.com/podcast/[^/]+/episode/(?P[^/?&#]+)-|iheartradio:)(?P\d+)' _TEST = { 'url': 'https://www.iheart.com/podcast/105-behind-the-bastards-29236323/episode/part-one-alexander-lukashenko-the-dictator-70346499/?embed=true', diff --git a/yt_dlp/extractor/kinja.py b/yt_dlp/extractor/kinja.py index a225d0a0d2..f4e5c4c479 100644 --- a/yt_dlp/extractor/kinja.py +++ b/yt_dlp/extractor/kinja.py @@ -12,7 +12,7 @@ class KinjaEmbedIE(InfoExtractor): - IENAME = 'kinja:embed' + IE_NAME = 'kinja:embed' _DOMAIN_REGEX = r'''(?:[^.]+\.)? (?: avclub| diff --git a/yt_dlp/extractor/nba.py b/yt_dlp/extractor/nba.py index d8fc82488d..81d11e3a50 100644 --- a/yt_dlp/extractor/nba.py +++ b/yt_dlp/extractor/nba.py @@ -97,7 +97,7 @@ def _extract_video(self, filter_key, filter_value): class NBAWatchEmbedIE(NBAWatchBaseIE): - IENAME = 'nba:watch:embed' + IE_NAME = 'nba:watch:embed' _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P\d+)' _TESTS = [{ 'url': 'http://watch.nba.com/embed?id=659395', @@ -339,7 +339,7 @@ def _real_extract(self, url): class NBAEmbedIE(NBABaseIE): - IENAME = 'nba:embed' + IE_NAME = 'nba:embed' _VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P[^?#&]+)' _TESTS = [{ 'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&Env=', @@ -361,7 +361,7 @@ def _real_extract(self, url): class NBAIE(NBABaseIE): - IENAME = 'nba' + IE_NAME = 'nba' _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX _TESTS = [{ 'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774', @@ -388,7 +388,7 @@ def _extract_url_results(self, team, content_id): class NBAChannelIE(NBABaseIE): - IENAME = 'nba:channel' + IE_NAME = 'nba:channel' _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX _TESTS = [{ 'url': 'https://www.nba.com/blazers/video/channel/summer_league', From 0d531c35eca4c2eb36e160530a7a333edbc727cc Mon Sep 17 00:00:00 2001 From: Nicolas Dato <67328748+nicodato@users.noreply.github.com> Date: Fri, 22 Dec 2023 18:52:07 -0300 Subject: [PATCH 02/17] [ie/RudoVideo] Add extractor (#8664) Authored by: nicodato --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/rudovideo.py | 135 ++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 yt_dlp/extractor/rudovideo.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index d5f030c6b0..5c34bb7f4b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1647,6 +1647,7 @@ RumbleIE, RumbleChannelIE, ) +from .rudovideo import RudoVideoIE from .rutube import ( RutubeIE, RutubeChannelIE, diff --git a/yt_dlp/extractor/rudovideo.py b/yt_dlp/extractor/rudovideo.py new file mode 100644 index 0000000000..1b8595593d --- /dev/null +++ b/yt_dlp/extractor/rudovideo.py @@ -0,0 +1,135 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + determine_ext, + js_to_json, + traverse_obj, + update_url_query, + url_or_none, +) + + +class RudoVideoIE(InfoExtractor): + _VALID_URL = r'https?://rudo\.video/(?Pvod|podcast|live)/(?P[^/?&#]+)' + _EMBED_REGEX = [r']+src=[\'"](?P(?:https?:)//rudo\.video/(?:vod|podcast|live)/[^\'"]+)'] + _TESTS = [{ + 'url': 'https://rudo.video/podcast/cz2wrUy8l0o', + 'md5': '28ed82b477708dc5e12e072da2449221', + 'info_dict': { + 'id': 'cz2wrUy8l0o', + 'title': 'Diego Cabot', + 'ext': 'mp4', + 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$', + }, + }, { + 'url': 'https://rudo.video/podcast/bQkt07', + 'md5': '36b22a9863de0f47f00fc7532a32a898', + 'info_dict': { + 'id': 'bQkt07', + 'title': 'Tubular Bells', + 'ext': 'mp4', + 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$', + }, + }, { + 'url': 'https://rudo.video/podcast/b42ZUznHX0', + 'md5': 'b91c70d832938871367f8ad10c895821', + 'info_dict': { + 'id': 'b42ZUznHX0', + 'title': 'Columna Ruperto Concha', + 'ext': 'mp3', + 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$', + }, + }, { + 'url': 'https://rudo.video/vod/bN5AaJ', + 'md5': '01324a329227e2591530ecb4f555c881', + 'info_dict': { + 'id': 'bN5AaJ', + 'title': 'Ucrania 19.03', + 'creator': 'La Tercera', + 'ext': 'mp4', + 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$', + }, + }, { + 'url': 'https://rudo.video/live/bbtv', + 'info_dict': { + 'id': 'bbtv', + 'ext': 'mp4', + 'creator': 'BioBioTV', + 'live_status': 'is_live', + 'title': r're:^LIVE BBTV\s\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}$', + 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$', + }, + }, { + 'url': 'https://rudo.video/live/c13', + 'info_dict': { + 'id': 'c13', + 'title': 'CANAL13', + 'ext': 'mp4', + }, + 'skip': 'Geo-restricted to Chile', + }, { + 'url': 'https://rudo.video/live/t13-13cl', + 'info_dict': { + 'id': 't13-13cl', + 'title': 'T13', + 'ext': 'mp4', + }, + 'skip': 'Geo-restricted to Chile', + }] + + def _real_extract(self, url): + video_id, type_ = self._match_valid_url(url).group('id', 'type') + is_live = type_ == 'live' + + webpage = self._download_webpage(url, video_id) + if 'Streaming is not available in your area' in webpage: + self.raise_geo_restricted() + + media_url = ( + self._search_regex( + r'var\s+streamURL\s*=\s*[\'"]([^?\'"]+)', webpage, 'stream url', default=None) + # Source URL must be used only if streamURL is unavailable + or self._search_regex( + r']+src=[\'"]([^\'"]+)', webpage, 'source url', default=None)) + if not media_url: + youtube_url = self._search_regex(r'file:\s*[\'"]((?:https?:)//(?:www\.)?youtube\.com[^\'"]+)', + webpage, 'youtube url', default=None) + if youtube_url: + return self.url_result(youtube_url, 'Youtube') + raise ExtractorError('Unable to extract stream url') + + token_array = self._search_json( + r'