From a403dcf9be20b49cbb3017328f4aaa352fb6d685 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Mon, 4 Nov 2024 07:02:48 +0800 Subject: [PATCH] [ie/Dailymotion] Improve embed extraction (#10843) Closes #8848, Closes #9432 Authored by: pzhlkj6612, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/dailymotion.py | 115 ++++++++++++++++++++++++++++---- 1 file changed, 101 insertions(+), 14 deletions(-) diff --git a/yt_dlp/extractor/dailymotion.py b/yt_dlp/extractor/dailymotion.py index 632335e5b0..4e99fdda7a 100644 --- a/yt_dlp/extractor/dailymotion.py +++ b/yt_dlp/extractor/dailymotion.py @@ -10,11 +10,14 @@ OnDemandPagedList, age_restricted, clean_html, + extract_attributes, int_or_none, traverse_obj, try_get, unescapeHTML, unsmuggle_url, + update_url, + url_or_none, urlencode_postdata, ) @@ -99,11 +102,16 @@ class DailymotionIE(DailymotionBaseInfoExtractor): _VALID_URL = r'''(?ix) https?:// (?: - (?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:(?:embed|swf|\#)/)|player(?:/\w+)?\.html\?)?video|swf)| - (?:www\.)?lequipe\.fr/video + (?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}| + (?:www\.)?lequipe\.fr + )/ + (?: + swf/(?!video)| + (?:(?:crawler|embed|swf)/)?video/| + player(?:/[\da-z]+)?\.html\?(?:video|(?Pplaylist))= ) - [/=](?P[^/?_&]+)(?:.+?\bplaylist=(?Px[0-9a-z]+))? - ''' + (?P[^/?_&#]+)(?:[\w-]*\?playlist=(?Px[0-9a-z]+))? + ''' IE_NAME = 'dailymotion' _EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1'] _TESTS = [{ @@ -217,6 +225,63 @@ class DailymotionIE(DailymotionBaseInfoExtractor): }, { 'url': 'https://geo.dailymotion.com/player/xakln.html?video=x8mjju4&customConfig%5BcustomParams%5D=%2Ffr-fr%2Ftennis%2Fwimbledon-mens-singles%2Farticles-video', 'only_matching': True, + }, { # playlist-only + 'url': 'https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj', + 'only_matching': True, + }, { + 'url': 'https://geo.dailymotion.com/player/xmyye.html?video=x93blhi', + 'only_matching': True, + }, { + 'url': 'https://www.dailymotion.com/crawler/video/x8u4owg', + 'only_matching': True, + }, { + 'url': 'https://www.dailymotion.com/embed/video/x8u4owg', + 'only_matching': True, + }] + _WEBPAGE_TESTS = [{ + # https://geo.dailymotion.com/player/xmyye.html?video=x93blhi + 'url': 'https://www.financialounge.com/video/2024/08/01/borse-europee-in-rosso-dopo-la-fed-a-milano-volano-mediobanca-e-tim-edizione-del-1-agosto/', + 'info_dict': { + 'id': 'x93blhi', + 'ext': 'mp4', + 'title': 'OnAir - 01/08/24', + 'description': '', + 'duration': 217, + 'timestamp': 1722505658, + 'upload_date': '20240801', + 'uploader': 'Financialounge', + 'uploader_id': 'x2vtgmm', + 'age_limit': 0, + 'tags': [], + 'view_count': int, + 'like_count': int, + }, + }, { + # https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj + 'url': 'https://www.cycleworld.com/blogs/ask-kevin/ducati-continues-to-evolve-with-v4/', + 'info_dict': { + 'id': 'x7wdsj', + }, + 'playlist_mincount': 50, + }, { + # https://www.dailymotion.com/crawler/video/x8u4owg + 'url': 'https://www.leparisien.fr/environnement/video-le-veloto-la-voiture-a-pedales-qui-aimerait-se-faire-une-place-sur-les-routes-09-03-2024-KCYMCPM4WFHJXMSKBUI66UNFPU.php', + 'info_dict': { + 'id': 'x8u4owg', + 'ext': 'mp4', + 'like_count': int, + 'uploader': 'Le Parisien', + 'thumbnail': 'https://www.leparisien.fr/resizer/ho_GwveeYftNkLwg_cEta--5Bv4=/1200x675/cloudfront-eu-central-1.images.arcpublishing.com/leparisien/BFXJNEBN75EUNHGYJLORUC3TX4.jpg', + 'upload_date': '20240309', + 'view_count': int, + 'timestamp': 1709997866, + 'age_limit': 0, + 'uploader_id': 'x32f7b', + 'title': 'VIDÉO. Le «\xa0véloto\xa0», la voiture à pédales qui aimerait se faire une place sur les routes', + 'duration': 428.0, + 'description': 'À bord du « véloto », l’alternative à la voiture pour la campagne', + 'tags': ['biclou', 'vélo', 'véloto', 'campagne', 'voiture', 'environnement', 'véhicules intermédiaires'], + }, }] _GEO_BYPASS = False _COMMON_MEDIA_FIELDS = '''description @@ -232,16 +297,35 @@ def _extract_embed_urls(cls, url, webpage): for mobj in re.finditer( r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P[0-9a-zA-Z]+).+?}\s*\);', webpage): yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id') + for mobj in re.finditer( + r'(?s)