Merge branch 'master' into ndtv

This commit is contained in:
subrat-lima 2024-08-22 13:53:03 +05:30
commit 92a176955f
5 changed files with 91 additions and 20 deletions

View file

@ -101,9 +101,10 @@ def _real_initialize(self):
self._HEADERS['Authorization'] = f'Bearer {token}' self._HEADERS['Authorization'] = f'Bearer {token}'
def _real_extract(self, url): def _real_extract(self, url):
video_id, event, type_, slug = self._match_valid_url(url).group('id', 'event', 'type', 'slug') webpage, urlh = self._download_webpage_handle(url, self._match_id(url))
video_id, event, type_, slug = self._match_valid_url(urlh.url).group('id', 'event', 'type', 'slug')
video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_] video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_]
webpage = self._download_webpage(url, video_id)
event_data = traverse_obj( event_data = traverse_obj(
self._search_nextjs_data(webpage, video_id, default={}), self._search_nextjs_data(webpage, video_id, default={}),
('props', 'pageProps', 'eventCMSData', { ('props', 'pageProps', 'eventCMSData', {

View file

@ -3,7 +3,12 @@
class EurosportIE(InfoExtractor): class EurosportIE(InfoExtractor):
_VALID_URL = r'https?://www\.eurosport\.com/\w+/(?:[\w-]+/[\d-]+/)?[\w-]+_(?P<id>vid\d+)' _VALID_URL = r'''(?x)
https?://(?:
(?:(?:www|espanol)\.)?eurosport\.(?:com(?:\.tr)?|de|dk|es|fr|hu|it|nl|no|ro)|
eurosport\.tvn24\.pl
)/[\w-]+/(?:[\w-]+/[\d-]+/)?[\w.-]+_(?P<id>vid\d+)
'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml', 'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml',
'info_dict': { 'info_dict': {
@ -70,6 +75,42 @@ class EurosportIE(InfoExtractor):
'duration': 105.0, 'duration': 105.0,
'upload_date': '20230518', 'upload_date': '20230518',
}, },
}, {
'url': 'https://www.eurosport.de/radsport/vuelta-a-espana/2024/vuelta-a-espana-2024-wout-van-aert-und-co.-verzweifeln-an-mcnulty-zeitfahr-krimi-in-lissabon_vid2219478/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.dk/speedway/mikkel-michelsen-misser-finalen-i-cardiff-se-danskeren-i-semifinalen-her_vid2219363/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.nl/mixed-martial-arts/ufc/2022/ufc-305-respect-tussen-adesanya-en-du-plessis_vid2219650/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.es/ciclismo/la-vuelta-2024-carlos-rodriguez-olvida-la-crono-y-ya-espera-que-llegue-la-montana-no-me-encontre-nada-comodo_vid2219682/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.fr/football/supercoupe-d-europe/2024-2025/kylian-mbappe-vinicius-junior-eduardo-camavinga-touche.-extraits-de-l-entrainement-du-real-madrid-en-video_vid2216993/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.it/calcio/serie-a/2024-2025/samardzic-a-bergamo-per-le-visite-mediche-con-l-atalanta_vid2219680/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.hu/kerekpar/vuelta-a-espana/2024/dramai-harc-a-masodpercekert-meglepetesgyoztes-a-vuelta-nyitoszakaszan_vid2219481/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid30000618/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid2219531/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.ro/tenis/western-southern-open-2/2024/rezumatul-partidei-dintre-zverev-si-shelton-de-la-cincinnati_vid2219657/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.com.tr/hentbol/olympic-games-paris-2024/2024/paris-2024-denmark-ile-germany-olimpiyatlarin-onemli-anlari_vid2215836/video.shtml',
'only_matching': True,
}, {
'url': 'https://eurosport.tvn24.pl/kolarstwo/tour-de-france-kobiet/2024/kasia-niewiadoma-przed-ostatnim-8.-etapem-tour-de-france-kobiet_vid2219765/video.shtml',
'only_matching': True,
}] }]
_TOKEN = None _TOKEN = None
@ -77,6 +118,7 @@ class EurosportIE(InfoExtractor):
# actually defined in https://netsport.eurosport.io/?variables={"databaseId":<databaseId>,"playoutType":"VDP"}&extensions={"persistedQuery":{"version":1 .. # actually defined in https://netsport.eurosport.io/?variables={"databaseId":<databaseId>,"playoutType":"VDP"}&extensions={"persistedQuery":{"version":1 ..
# but this method require to get sha256 hash # but this method require to get sha256 hash
_GEO_COUNTRIES = ['DE', 'NL', 'EU', 'IT', 'FR'] # Not complete list but it should work _GEO_COUNTRIES = ['DE', 'NL', 'EU', 'IT', 'FR'] # Not complete list but it should work
_GEO_BYPASS = False
def _real_initialize(self): def _real_initialize(self):
if EurosportIE._TOKEN is None: if EurosportIE._TOKEN is None:
@ -98,13 +140,13 @@ def _real_extract(self, url):
for stream_type in json_data['attributes']['streaming']: for stream_type in json_data['attributes']['streaming']:
if stream_type == 'hls': if stream_type == 'hls':
fmts, subs = self._extract_m3u8_formats_and_subtitles( fmts, subs = self._extract_m3u8_formats_and_subtitles(
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4') traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4', fatal=False)
elif stream_type == 'dash': elif stream_type == 'dash':
fmts, subs = self._extract_mpd_formats_and_subtitles( fmts, subs = self._extract_mpd_formats_and_subtitles(
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id) traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
elif stream_type == 'mss': elif stream_type == 'mss':
fmts, subs = self._extract_ism_formats_and_subtitles( fmts, subs = self._extract_ism_formats_and_subtitles(
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id) traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
formats.extend(fmts) formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles) self._merge_subtitles(subs, target=subtitles)

View file

@ -7,6 +7,7 @@
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
clean_html, clean_html,
join_nonempty,
time_seconds, time_seconds,
try_call, try_call,
unified_timestamp, unified_timestamp,
@ -167,7 +168,7 @@ def _extract_performers(self, prog):
class RadikoIE(RadikoBaseIE): class RadikoIE(RadikoBaseIE):
_VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<timestring>\d+)'
_TESTS = [{ _TESTS = [{
# QRR (文化放送) station provides <desc> # QRR (文化放送) station provides <desc>
@ -183,8 +184,9 @@ class RadikoIE(RadikoBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
station, video_id = self._match_valid_url(url).groups() station, timestring = self._match_valid_url(url).group('station', 'timestring')
vid_int = unified_timestamp(video_id, False) video_id = join_nonempty(station, timestring)
vid_int = unified_timestamp(timestring, False)
prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int) prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int)
auth_token, area_id = self._auth_client() auth_token, area_id = self._auth_client()
@ -207,7 +209,7 @@ def _real_extract(self, url):
'ft': radio_begin, 'ft': radio_begin,
'end_at': radio_end, 'end_at': radio_end,
'to': radio_end, 'to': radio_end,
'seek': video_id, 'seek': timestring,
}, },
), ),
} }

View file

@ -8,7 +8,7 @@
class TVN24IE(InfoExtractor): class TVN24IE(InfoExtractor):
_WORKING = False _WORKING = False
_VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)' _VALID_URL = r'https?://(?:(?!eurosport)[^/]+\.)?tvn24(?:bis)?\.pl/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html', 'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html',
'md5': 'fbdec753d7bc29d96036808275f2130c', 'md5': 'fbdec753d7bc29d96036808275f2130c',

View file

@ -1764,7 +1764,7 @@ class TwitterSpacesIE(TwitterBaseIE):
'release_timestamp': 1659904215, 'release_timestamp': 1659904215,
'release_date': '20220807', 'release_date': '20220807',
}, },
'params': {'skip_download': 'm3u8'}, 'skip': 'No longer available',
}, { }, {
# post_live/TimedOut but downloadable # post_live/TimedOut but downloadable
'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl', 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
@ -1780,6 +1780,8 @@ class TwitterSpacesIE(TwitterBaseIE):
'upload_date': '20230413', 'upload_date': '20230413',
'release_timestamp': 1681839000, 'release_timestamp': 1681839000,
'release_date': '20230418', 'release_date': '20230418',
'protocol': 'm3u8', # ffmpeg is forced
'container': 'm4a_dash', # audio-only format fixup is applied
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
@ -1790,11 +1792,31 @@ class TwitterSpacesIE(TwitterBaseIE):
'ext': 'm4a', 'ext': 'm4a',
'title': '', 'title': '',
'description': 'Twitter Space participated by nobody yet', 'description': 'Twitter Space participated by nobody yet',
'uploader': '息根とめる🔪Twitchで復活', 'uploader': '息根とめる',
'uploader_id': 'tomeru_ikinone', 'uploader_id': 'tomeru_ikinone',
'live_status': 'was_live', 'live_status': 'was_live',
'timestamp': 1685617198, 'timestamp': 1685617198,
'upload_date': '20230601', 'upload_date': '20230601',
'protocol': 'm3u8', # ffmpeg is forced
'container': 'm4a_dash', # audio-only format fixup is applied
},
'params': {'skip_download': 'm3u8'},
}, {
# Video Space
'url': 'https://x.com/i/spaces/1DXGydznBYWKM',
'info_dict': {
'id': '1DXGydznBYWKM',
'ext': 'mp4',
'title': 'America and Israels “special relationship”',
'description': 'Twitter Space participated by nobody yet',
'uploader': 'Candace Owens',
'uploader_id': 'RealCandaceO',
'live_status': 'was_live',
'timestamp': 1723931351,
'upload_date': '20240817',
'release_timestamp': 1723932000,
'release_date': '20240817',
'protocol': 'm3u8_native', # not ffmpeg, detected as video space
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}] }]
@ -1854,13 +1876,17 @@ def _real_extract(self, url):
source = traverse_obj( source = traverse_obj(
self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']), self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False) ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
formats = self._extract_m3u8_formats( # XXX: Some Spaces need ffmpeg as downloader is_audio_space = source and 'audio-space' in source
source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live, formats = self._extract_m3u8_formats(
headers=headers, fatal=False) if source else [] source, metadata['media_key'], 'm4a' if is_audio_space else 'mp4',
for fmt in formats: # XXX: Some audio-only Spaces need ffmpeg as downloader
fmt.update({'vcodec': 'none', 'acodec': 'aac'}) entry_protocol='m3u8' if is_audio_space else 'm3u8_native',
if not is_live: live=is_live, headers=headers, fatal=False) if source else []
fmt['container'] = 'm4a_dash' if is_audio_space:
for fmt in formats:
fmt.update({'vcodec': 'none', 'acodec': 'aac'})
if not is_live:
fmt['container'] = 'm4a_dash'
participants = ', '.join(traverse_obj( participants = ', '.join(traverse_obj(
space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet' space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'