mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-02 14:37:21 +00:00
support embedded playlist; write sane regex; rework _VALID_URL
Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> Co-authored-by: sepro <sepro@sepr0.com>
This commit is contained in:
parent
c4172f6de6
commit
cddd001021
|
@ -16,6 +16,7 @@
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
|
update_url,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
@ -101,10 +102,15 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||||
_VALID_URL = r'''(?ix)
|
_VALID_URL = r'''(?ix)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:(?:embed|swf|\#)/)|player(?:/\w+)?\.html\?)?video|swf)|
|
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}|
|
||||||
(?:www\.)?lequipe\.fr/video
|
(?:www\.)?lequipe\.fr
|
||||||
|
)/
|
||||||
|
(?:
|
||||||
|
video/|
|
||||||
|
swf(?:/(?!video)|/video/)|
|
||||||
|
player(?:(?:/\w+)?\.html)?(?:\?video=|/)
|
||||||
)
|
)
|
||||||
[/=](?P<id>[^/?_&]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
|
(?P<id>[^/?_&#]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
|
||||||
'''
|
'''
|
||||||
IE_NAME = 'dailymotion'
|
IE_NAME = 'dailymotion'
|
||||||
_EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1']
|
_EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1']
|
||||||
|
@ -220,6 +226,31 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||||
'url': 'https://geo.dailymotion.com/player/xakln.html?video=x8mjju4&customConfig%5BcustomParams%5D=%2Ffr-fr%2Ftennis%2Fwimbledon-mens-singles%2Farticles-video',
|
'url': 'https://geo.dailymotion.com/player/xakln.html?video=x8mjju4&customConfig%5BcustomParams%5D=%2Ffr-fr%2Ftennis%2Fwimbledon-mens-singles%2Farticles-video',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
_WEBPAGE_TESTS = [{
|
||||||
|
'url': 'https://www.financialounge.com/video/2024/08/01/borse-europee-in-rosso-dopo-la-fed-a-milano-volano-mediobanca-e-tim-edizione-del-1-agosto/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'x93blhi',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'OnAir - 01/08/24',
|
||||||
|
'description': '',
|
||||||
|
'duration': 217,
|
||||||
|
'timestamp': 1722505658,
|
||||||
|
'upload_date': '20240801',
|
||||||
|
'uploader': 'Financialounge',
|
||||||
|
'uploader_id': 'x2vtgmm',
|
||||||
|
'age_limit': 0,
|
||||||
|
'tags': [],
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cycleworld.com/blogs/ask-kevin/ducati-continues-to-evolve-with-v4/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'x7wdsj',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 50,
|
||||||
|
}]
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
_COMMON_MEDIA_FIELDS = '''description
|
_COMMON_MEDIA_FIELDS = '''description
|
||||||
geoblockedCountries {
|
geoblockedCountries {
|
||||||
|
@ -235,12 +266,21 @@ def _extract_embed_urls(cls, url, webpage):
|
||||||
r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
|
r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
|
||||||
yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
|
yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
|
||||||
for mobj in re.finditer(
|
for mobj in re.finditer(
|
||||||
r'(?s)<script\s+?[^>]*?\bsrc=(["\'])((?:https?:)?//[^>]+\.dailymotion\.com/player/(?:(?!\1).)+)\1[^>]*?>', webpage):
|
r'(?s)<script [^>]*\bsrc=(["\'])(?:https?:)?//[\w-]+\.dailymotion\.com/player/(?:(?!\1).)+\1[^>]*>', webpage):
|
||||||
attrs = extract_attributes(mobj.group(0))
|
attrs = extract_attributes(mobj.group(0))
|
||||||
player_url = url_or_none(attrs.get('src'))
|
player_url = url_or_none(attrs.get('src'))
|
||||||
video_id = attrs.get('data-video')
|
if not player_url:
|
||||||
if all((player_url, video_id)):
|
continue
|
||||||
yield f'{player_url.replace(".js", ".html")}?video={video_id}'
|
player_url = player_url.replace('.js', '.html')
|
||||||
|
if player_url.startswith('//'):
|
||||||
|
player_url = f'https:{player_url}'
|
||||||
|
if video_id := attrs.get('data-video'):
|
||||||
|
query_string = f'video={video_id}'
|
||||||
|
elif playlist_id := attrs.get('data-playlist'):
|
||||||
|
query_string = f'playlist={playlist_id}'
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
yield update_url(player_url, query=query_string)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url, smuggled_data = unsmuggle_url(url)
|
url, smuggled_data = unsmuggle_url(url)
|
||||||
|
|
Loading…
Reference in a new issue