Date: Sat, 11 May 2024 12:38:41 -0400
Subject: [PATCH 091/251] [ie/nfb] Fix extractors (#9650)
Authored by: rrgomes
---
yt_dlp/extractor/nfb.py | 27 ++++++++++-----------------
1 file changed, 10 insertions(+), 17 deletions(-)
diff --git a/yt_dlp/extractor/nfb.py b/yt_dlp/extractor/nfb.py
index 6f78728253..968c9728b0 100644
--- a/yt_dlp/extractor/nfb.py
+++ b/yt_dlp/extractor/nfb.py
@@ -5,7 +5,6 @@
merge_dicts,
parse_count,
url_or_none,
- urljoin,
)
from ..utils.traversal import traverse_obj
@@ -16,8 +15,7 @@ class NFBBaseIE(InfoExtractor):
def _extract_ep_data(self, webpage, video_id, fatal=False):
return self._search_json(
- r'const\s+episodesData\s*=', webpage, 'episode data', video_id,
- contains_pattern=r'\[\s*{(?s:.+)}\s*\]', fatal=fatal) or []
+ r'episodesData\s*:', webpage, 'episode data', video_id, fatal=fatal) or {}
def _extract_ep_info(self, data, video_id, slug=None):
info = traverse_obj(data, (lambda _, v: video_id in v['embed_url'], {
@@ -224,18 +222,14 @@ def _real_extract(self, url):
# type_ can change from film to serie(s) after redirect; new slug may have episode number
type_, slug = self._match_valid_url(urlh.url).group('type', 'id')
- embed_url = urljoin(f'https://www.{site}.ca', self._html_search_regex(
- r'<[^>]+\bid=["\']player-iframe["\'][^>]*\bsrc=["\']([^"\']+)', webpage, 'embed url'))
- video_id = self._match_id(embed_url) # embed url has unique slug
- player = self._download_webpage(embed_url, video_id, 'Downloading player page')
- if 'MESSAGE_GEOBLOCKED' in player:
- self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ player_data = self._search_json(
+ r'window\.PLAYER_OPTIONS\[[^\]]+\]\s*=', webpage, 'player data', slug)
+ video_id = self._match_id(player_data['overlay']['url']) # overlay url always has unique slug
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
- self._html_search_regex(r'source:\s*\'([^\']+)', player, 'm3u8 url'),
- video_id, 'mp4', m3u8_id='hls')
+ player_data['source'], video_id, 'mp4', m3u8_id='hls')
- if dv_source := self._html_search_regex(r'dvSource:\s*\'([^\']+)', player, 'dv', default=None):
+ if dv_source := url_or_none(player_data.get('dvSource')):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
dv_source, video_id, 'mp4', m3u8_id='dv', preference=-2, fatal=False)
for fmt in fmts:
@@ -246,17 +240,16 @@ def _real_extract(self, url):
info = {
'id': video_id,
'title': self._html_search_regex(
- r'<[^>]+\bid=["\']titleHeader["\'][^>]*>\s*]*>\s*([^<]+?)\s*
',
+ r'["\']nfb_version_title["\']\s*:\s*["\']([^"\']+)',
webpage, 'title', default=None),
'description': self._html_search_regex(
r'<[^>]+\bid=["\']tabSynopsis["\'][^>]*>\s*]*>\s*([^<]+)',
webpage, 'description', default=None),
- 'thumbnail': self._html_search_regex(
- r'poster:\s*\'([^\']+)', player, 'thumbnail', default=None),
+ 'thumbnail': url_or_none(player_data.get('poster')),
'uploader': self._html_search_regex(
- r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', webpage, 'uploader', default=None),
+ r'<[^>]+\bitemprop=["\']director["\'][^>]*>([^<]+)', webpage, 'uploader', default=None),
'release_year': int_or_none(self._html_search_regex(
- r'<[^>]+\bitemprop=["\']datePublished["\'][^>]*>([^<]+)',
+ r'["\']nfb_version_year["\']\s*:\s*["\']([^"\']+)',
webpage, 'release_year', default=None)),
} if type_ == 'film' else self._extract_ep_info(self._extract_ep_data(webpage, video_id, slug), video_id)
From fc2879ecb05aaad36869609d154e4321362c1f63 Mon Sep 17 00:00:00 2001
From: Hugo Azevedo
Date: Sat, 11 May 2024 09:54:29 -0700
Subject: [PATCH 092/251] [ie/alura] Fix extractor (#9658)
Authored by: hugohaa
---
yt_dlp/extractor/alura.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/yt_dlp/extractor/alura.py b/yt_dlp/extractor/alura.py
index bfe066bc68..b785c62c32 100644
--- a/yt_dlp/extractor/alura.py
+++ b/yt_dlp/extractor/alura.py
@@ -39,7 +39,7 @@ class AluraIE(InfoExtractor):
def _real_extract(self, url):
- course, video_id = self._match_valid_url(url)
+ course, video_id = self._match_valid_url(url).group('course_name', 'id')
video_url = self._VIDEO_URL % (course, video_id)
video_dict = self._download_json(video_url, video_id, 'Searching for videos')
@@ -52,7 +52,7 @@ def _real_extract(self, url):
formats = []
for video_obj in video_dict:
- video_url_m3u8 = video_obj.get('link')
+ video_url_m3u8 = video_obj.get('mp4')
video_format = self._extract_m3u8_formats(
video_url_m3u8, None, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)
From 31b417e1d1ccc67d5c027bf8878f483dc34cb118 Mon Sep 17 00:00:00 2001
From: llamasblade <69692580+llamasblade@users.noreply.github.com>
Date: Sat, 11 May 2024 17:01:56 +0000
Subject: [PATCH 093/251] [ie/hytale] Use `CloudflareStreamIE` explicitly
(#9672)
Authored by: llamasblade
---
yt_dlp/extractor/hytale.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/yt_dlp/extractor/hytale.py b/yt_dlp/extractor/hytale.py
index 0f4dcc309b..e8cd21a648 100644
--- a/yt_dlp/extractor/hytale.py
+++ b/yt_dlp/extractor/hytale.py
@@ -1,7 +1,8 @@
import re
+from .cloudflarestream import CloudflareStreamIE
from .common import InfoExtractor
-from ..utils import traverse_obj
+from ..utils.traversal import traverse_obj
class HytaleIE(InfoExtractor):
@@ -49,7 +50,7 @@ def _real_extract(self, url):
entries = [
self.url_result(
f'https://cloudflarestream.com/{video_hash}/manifest/video.mpd?parentOrigin=https%3A%2F%2Fhytale.com',
- title=self._titles.get(video_hash), url_transparent=True)
+ CloudflareStreamIE, title=self._titles.get(video_hash), url_transparent=True)
for video_hash in re.findall(
r'
Date: Sat, 11 May 2024 19:25:39 +0200
Subject: [PATCH 094/251] [cookies] Get chrome session cookies with
`--cookies-from-browser` (#9747)
Partially addresses #5534
Authored by: StefanLobbenmeier
---
yt_dlp/cookies.py | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py
index 7b8d215f03..0de0672e12 100644
--- a/yt_dlp/cookies.py
+++ b/yt_dlp/cookies.py
@@ -347,6 +347,11 @@ def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, pa
if value is None:
return is_encrypted, None
+ # In chrome, session cookies have expires_utc set to 0
+ # In our cookie-store, cookies that do not expire should have expires set to None
+ if not expires_utc:
+ expires_utc = None
+
return is_encrypted, http.cookiejar.Cookie(
version=0, name=name, value=value, port=None, port_specified=False,
domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
From 7e4259dff0b681a3f0e8a930799ce0394328c86e Mon Sep 17 00:00:00 2001
From: DaPotato69 <128940918+DaPotato69@users.noreply.github.com>
Date: Sun, 12 May 2024 07:11:40 +1000
Subject: [PATCH 095/251] Better warning when requested subs format not found
(#9873)
Closes #9760
Authored by: DaPotato69
---
yt_dlp/YoutubeDL.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index e0d58f0f49..2c6f695d09 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -3071,7 +3071,7 @@ def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
f = formats[-1]
self.report_warning(
'No subtitle format found matching "%s" for language %s, '
- 'using %s' % (formats_query, lang, f['ext']))
+ 'using %s. Use --list-subs for a list of available subtitles' % (formats_query, lang, f['ext']))
subs[lang] = f
return subs
From 800a43983e5fb719526ce4cb3956216085c63268 Mon Sep 17 00:00:00 2001
From: Eric Lam
Date: Sun, 12 May 2024 05:50:59 +0800
Subject: [PATCH 096/251] [ie/EuroParlWebstream] Support new URL format (#9647)
Authored by: voidful, seproDev
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
---
yt_dlp/extractor/europa.py | 20 ++++++++++++++++++--
1 file changed, 18 insertions(+), 2 deletions(-)
diff --git a/yt_dlp/extractor/europa.py b/yt_dlp/extractor/europa.py
index 191a4361a2..29dfc8ae95 100644
--- a/yt_dlp/extractor/europa.py
+++ b/yt_dlp/extractor/europa.py
@@ -94,13 +94,14 @@ def get_item(type_, preference):
class EuroParlWebstreamIE(InfoExtractor):
_VALID_URL = r'''(?x)
- https?://multimedia\.europarl\.europa\.eu/[^/#?]+/
- (?:(?!video)[^/#?]+/[\w-]+_)(?P[\w-]+)
+ https?://multimedia\.europarl\.europa\.eu/
+ (?:\w+/)?webstreaming/(?:[\w-]+_)?(?P[\w-]+)
'''
_TESTS = [{
'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY',
'info_dict': {
'id': '62388b15-d85b-4add-99aa-ba12ccf64f0d',
+ 'display_id': '20220914-0900-PLENARY',
'ext': 'mp4',
'title': 'Plenary session',
'release_timestamp': 1663139069,
@@ -125,6 +126,7 @@ class EuroParlWebstreamIE(InfoExtractor):
'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-culture-and-education_20230301-1130-COMMITTEE-CULT',
'info_dict': {
'id': '7355662c-8eac-445e-4bb9-08db14b0ddd7',
+ 'display_id': '20230301-1130-COMMITTEE-CULT',
'ext': 'mp4',
'release_date': '20230301',
'title': 'Committee on Culture and Education',
@@ -142,6 +144,19 @@ class EuroParlWebstreamIE(InfoExtractor):
'live_status': 'is_live',
},
'skip': 'Not live anymore'
+ }, {
+ 'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/20240320-1345-SPECIAL-PRESSER',
+ 'info_dict': {
+ 'id': 'c1f11567-5b52-470a-f3e1-08dc3c216ace',
+ 'display_id': '20240320-1345-SPECIAL-PRESSER',
+ 'ext': 'mp4',
+ 'release_date': '20240320',
+ 'title': 'md5:7c6c814cac55dea5e2d87bf8d3db2234',
+ 'release_timestamp': 1710939767,
+ }
+ }, {
+ 'url': 'https://multimedia.europarl.europa.eu/webstreaming/briefing-for-media-on-2024-european-elections_20240429-1000-SPECIAL-OTHER',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -166,6 +181,7 @@ def _real_extract(self, url):
return {
'id': json_info['id'],
+ 'display_id': display_id,
'title': traverse_obj(webpage_nextjs, (('mediaItem', 'title'), ('title', )), get_all=False),
'formats': formats,
'subtitles': subtitles,
From 6db96268c521e945d42649607db1574f5d92e082 Mon Sep 17 00:00:00 2001
From: alard
Date: Sat, 11 May 2024 23:58:15 +0200
Subject: [PATCH 097/251] [ie/TV5Monde] Fix extractor (#9143)
Closes #9118
Authored by: alard, seproDev
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
---
yt_dlp/extractor/tv5mondeplus.py | 149 ++++++++++++++-----------------
1 file changed, 68 insertions(+), 81 deletions(-)
diff --git a/yt_dlp/extractor/tv5mondeplus.py b/yt_dlp/extractor/tv5mondeplus.py
index a445fae853..52ff230f2a 100644
--- a/yt_dlp/extractor/tv5mondeplus.py
+++ b/yt_dlp/extractor/tv5mondeplus.py
@@ -2,85 +2,88 @@
from .common import InfoExtractor
from ..utils import (
+ clean_html,
determine_ext,
extract_attributes,
+ get_element_by_class,
+ get_element_html_by_class,
int_or_none,
- parse_duration,
- traverse_obj,
- try_get,
url_or_none,
)
+from ..utils.traversal import traverse_obj
class TV5MondePlusIE(InfoExtractor):
- IE_DESC = 'TV5MONDE+'
- _VALID_URL = r'https?://(?:www\.)?(?:tv5mondeplus|revoir\.tv5monde)\.com/toutes-les-videos/[^/]+/(?P[^/?#]+)'
+ IE_NAME = 'TV5MONDE'
+ _VALID_URL = r'https?://(?:www\.)?tv5monde\.com/tv/video/(?P[^/?#]+)'
_TESTS = [{
- # movie
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/les-novices',
- 'md5': 'c86f60bf8b75436455b1b205f9745955',
+ # documentary
+ 'url': 'https://www.tv5monde.com/tv/video/65931-baudouin-l-heritage-d-un-roi-baudouin-l-heritage-d-un-roi',
+ 'md5': 'd2a708902d3df230a357c99701aece05',
'info_dict': {
- 'id': 'ZX0ipMyFQq_6D4BA7b',
- 'display_id': 'les-novices',
+ 'id': '3FPa7JMu21_6D4BA7b',
+ 'display_id': '65931-baudouin-l-heritage-d-un-roi-baudouin-l-heritage-d-un-roi',
'ext': 'mp4',
- 'title': 'Les novices',
- 'description': 'md5:2e7c33ba3ad48dabfcc2a956b88bde2b',
- 'upload_date': '20230821',
- 'thumbnail': 'https://revoir.tv5monde.com/uploads/media/video_thumbnail/0738/60/01e952b7ccf36b7c6007ec9131588954ab651de9.jpeg',
- 'duration': 5177,
- 'episode': 'Les novices',
+ 'title': "Baudouin, l'héritage d'un roi",
+ 'thumbnail': 'https://psi.tv5monde.com/upsilon-images/960x540/6f/baudouin-f49c6b0e.jpg',
+ 'duration': 4842,
+ 'upload_date': '20240130',
+ 'timestamp': 1706641242,
+ 'episode': "BAUDOUIN, L'HERITAGE D'UN ROI",
+ 'description': 'md5:78125c74a5cac06d7743a2d09126edad',
+ 'series': "Baudouin, l'héritage d'un roi",
},
}, {
# series episode
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/opj-les-dents-de-la-terre-2',
+ 'url': 'https://www.tv5monde.com/tv/video/52952-toute-la-vie-mardi-23-mars-2021',
+ 'md5': 'f5e09637cadd55639c05874e22eb56bf',
'info_dict': {
- 'id': 'wJ0eeEPozr_6D4BA7b',
- 'display_id': 'opj-les-dents-de-la-terre-2',
+ 'id': 'obRRZ8m6g9_6D4BA7b',
+ 'display_id': '52952-toute-la-vie-mardi-23-mars-2021',
'ext': 'mp4',
- 'title': "OPJ - Les dents de la Terre (2)",
- 'description': 'md5:288f87fd68d993f814e66e60e5302d9d',
- 'upload_date': '20230823',
- 'series': 'OPJ',
- 'episode': 'Les dents de la Terre (2)',
- 'duration': 2877,
- 'thumbnail': 'https://dl-revoir.tv5monde.com/images/1a/5753448.jpg'
+ 'title': 'Toute la vie',
+ 'description': 'md5:a824a2e1dfd94cf45fa379a1fb43ce65',
+ 'thumbnail': 'https://psi.tv5monde.com/media/image/960px/5880553.jpg',
+ 'duration': 2526,
+ 'upload_date': '20230721',
+ 'timestamp': 1689971646,
+ 'series': 'Toute la vie',
+ 'episode': 'Mardi 23 mars 2021',
},
}, {
# movie
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/ceux-qui-travaillent',
- 'md5': '32fa0cde16a4480d1251502a66856d5f',
+ 'url': 'https://www.tv5monde.com/tv/video/8771-ce-fleuve-qui-nous-charrie-ce-fleuve-qui-nous-charrie-p001-ce-fleuve-qui-nous-charrie',
+ 'md5': '87cefc34e10a6bf4f7823cccd7b36eb2',
'info_dict': {
- 'id': 'dc57a011-ec4b-4648-2a9a-4f03f8352ed3',
- 'display_id': 'ceux-qui-travaillent',
+ 'id': 'DOcfvdLKXL_6D4BA7b',
+ 'display_id': '8771-ce-fleuve-qui-nous-charrie-ce-fleuve-qui-nous-charrie-p001-ce-fleuve-qui-nous-charrie',
'ext': 'mp4',
- 'title': 'Ceux qui travaillent',
- 'description': 'md5:570e8bb688036ace873b2d50d24c026d',
- 'upload_date': '20210819',
+ 'title': 'Ce fleuve qui nous charrie',
+ 'description': 'md5:62ba3f875343c7fc4082bdfbbc1be992',
+ 'thumbnail': 'https://psi.tv5monde.com/media/image/960px/5476617.jpg',
+ 'duration': 5300,
+ 'upload_date': '20210822',
+ 'timestamp': 1629594105,
+ 'episode': 'CE FLEUVE QUI NOUS CHARRIE-P001-CE FLEUVE QUI NOUS CHARRIE',
+ 'series': 'Ce fleuve qui nous charrie',
},
- 'skip': 'no longer available',
}, {
- # series episode
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/vestiaires-caro-actrice',
+ # news
+ 'url': 'https://www.tv5monde.com/tv/video/70402-tv5monde-le-journal-edition-du-08-05-24-11h',
+ 'md5': 'c62977d6d10754a2ecebba70ad370479',
'info_dict': {
- 'id': '9e9d599e-23af-6915-843e-ecbf62e97925',
- 'display_id': 'vestiaires-caro-actrice',
+ 'id': 'LgQFrOCNsc_6D4BA7b',
+ 'display_id': '70402-tv5monde-le-journal-edition-du-08-05-24-11h',
'ext': 'mp4',
- 'title': "Vestiaires - Caro actrice",
- 'description': 'md5:db15d2e1976641e08377f942778058ea',
- 'upload_date': '20210819',
- 'series': "Vestiaires",
- 'episode': 'Caro actrice',
+ 'title': 'TV5MONDE, le journal',
+ 'description': 'md5:777dc209eaa4423b678477c36b0b04a8',
+ 'thumbnail': 'https://psi.tv5monde.com/media/image/960px/6184105.jpg',
+ 'duration': 854,
+ 'upload_date': '20240508',
+ 'timestamp': 1715159640,
+ 'series': 'TV5MONDE, le journal',
+ 'episode': 'EDITION DU 08/05/24 - 11H',
},
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'no longer available',
- }, {
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/neuf-jours-en-hiver-neuf-jours-en-hiver',
- 'only_matching': True,
- }, {
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/info-societe/le-journal-de-la-rts-edition-du-30-01-20-19h30',
- 'only_matching': True,
}]
_GEO_BYPASS = False
@@ -98,7 +101,6 @@ def _real_extract(self, url):
if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
self.raise_geo_restricted(countries=['FR'])
- title = episode = self._html_search_regex(r'([^<]+)', webpage, 'title')
vpl_data = extract_attributes(self._search_regex(
r'(<[^>]+class="video_player_loader"[^>]+>)',
webpage, 'video player loader'))
@@ -147,26 +149,7 @@ def process_video_files(v):
process_video_files(video_files)
metadata = self._parse_json(
- vpl_data['data-metadata'], display_id)
- duration = (int_or_none(try_get(metadata, lambda x: x['content']['duration']))
- or parse_duration(self._html_search_meta('duration', webpage)))
-
- description = self._html_search_regex(
- r'(?s)
]+class=["\']episode-texte[^>]+>(.+?)
', webpage,
- 'description', fatal=False)
-
- series = self._html_search_regex(
- r']+class=["\']episode-emission[^>]+>([^<]+)', webpage,
- 'series', default=None)
-
- if series and series != title:
- title = '%s - %s' % (series, title)
-
- upload_date = self._search_regex(
- r'(?:date_publication|publish_date)["\']\s*:\s*["\'](\d{4}_\d{2}_\d{2})',
- webpage, 'upload date', default=None)
- if upload_date:
- upload_date = upload_date.replace('_', '')
+ vpl_data.get('data-metadata') or '{}', display_id, fatal=False)
if not video_id:
video_id = self._search_regex(
@@ -175,16 +158,20 @@ def process_video_files(v):
default=display_id)
return {
+ **traverse_obj(metadata, ('content', {
+ 'id': ('id', {str}),
+ 'title': ('title', {str}),
+ 'episode': ('title', {str}),
+ 'series': ('series', {str}),
+ 'timestamp': ('publishDate_ts', {int_or_none}),
+ 'duration': ('duration', {int_or_none}),
+ })),
'id': video_id,
'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnail': vpl_data.get('data-image'),
- 'duration': duration,
- 'upload_date': upload_date,
+ 'title': clean_html(get_element_by_class('main-title', webpage)),
+ 'description': clean_html(get_element_by_class('text', get_element_html_by_class('ep-summary', webpage) or '')),
+ 'thumbnail': url_or_none(vpl_data.get('data-image')),
'formats': formats,
'subtitles': self._extract_subtitles(self._parse_json(
traverse_obj(vpl_data, ('data-captions', {str}), default='{}'), display_id, fatal=False)),
- 'series': series,
- 'episode': episode,
}
From cf212d0a331aba05c32117573f760cdf3af8c62f Mon Sep 17 00:00:00 2001
From: Haxy
Date: Sun, 12 May 2024 17:03:36 +0100
Subject: [PATCH 098/251] [ie/youtube] Add `mediaconnect` client (#9546)
Authored by: clienthax
---
README.md | 2 +-
yt_dlp/extractor/youtube.py | 10 ++++++++++
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 37da789cf6..e3257682b5 100644
--- a/README.md
+++ b/README.md
@@ -1760,7 +1760,7 @@ # EXTRACTOR ARGUMENTS
#### youtube
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
-* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
+* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen`, `mediaconnect` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index e553fff9f1..4ce3e36001 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -240,6 +240,16 @@
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 85
},
+ # This client has pre-merged video+audio 720p/1080p streams
+ 'mediaconnect': {
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'MEDIA_CONNECT_FRONTEND',
+ 'clientVersion': '0.1',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 95
+ },
}
From 01395a34345d1c6ba1b73ca92f94dd200dc45341 Mon Sep 17 00:00:00 2001
From: sepro <4618135+seproDev@users.noreply.github.com>
Date: Sun, 12 May 2024 22:12:11 +0200
Subject: [PATCH 099/251] [cleanup] Remove questionable extractors (#9911)
Closes #6279, Closes #6799
Authored by: seproDev
---
yt_dlp/extractor/_extractors.py | 10 --
yt_dlp/extractor/cableav.py | 32 ------
yt_dlp/extractor/einthusan.py | 105 -----------------
yt_dlp/extractor/jable.py | 103 -----------------
yt_dlp/extractor/porn91.py | 95 ---------------
yt_dlp/extractor/unsupported.py | 14 +++
yt_dlp/extractor/xfileshare.py | 198 --------------------------------
yt_dlp/extractor/yourporn.py | 65 -----------
yt_dlp/extractor/yourupload.py | 43 -------
9 files changed, 14 insertions(+), 651 deletions(-)
delete mode 100644 yt_dlp/extractor/cableav.py
delete mode 100644 yt_dlp/extractor/einthusan.py
delete mode 100644 yt_dlp/extractor/jable.py
delete mode 100644 yt_dlp/extractor/porn91.py
delete mode 100644 yt_dlp/extractor/xfileshare.py
delete mode 100644 yt_dlp/extractor/yourporn.py
delete mode 100644 yt_dlp/extractor/yourupload.py
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 1f095c932a..cf408b6828 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -288,7 +288,6 @@
from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE
from .c56 import C56IE
-from .cableav import CableAVIE
from .callin import CallinIE
from .caltrans import CaltransIE
from .cam4 import CAM4IE
@@ -548,7 +547,6 @@
EggheadLessonIE,
)
from .eighttracks import EightTracksIE
-from .einthusan import EinthusanIE
from .eitb import EitbIE
from .elementorembed import ElementorEmbedIE
from .elonet import ElonetIE
@@ -861,10 +859,6 @@
)
from .ixigua import IxiguaIE
from .izlesene import IzleseneIE
-from .jable import (
- JableIE,
- JablePlaylistIE,
-)
from .jamendo import (
JamendoIE,
JamendoAlbumIE,
@@ -1499,7 +1493,6 @@
)
from .popcorntimes import PopcorntimesIE
from .popcorntv import PopcornTVIE
-from .porn91 import Porn91IE
from .pornbox import PornboxIE
from .pornflip import PornFlipIE
from .pornhub import (
@@ -2377,7 +2370,6 @@
)
from .xanimu import XanimuIE
from .xboxclips import XboxClipsIE
-from .xfileshare import XFileShareIE
from .xhamster import (
XHamsterIE,
XHamsterEmbedIE,
@@ -2432,8 +2424,6 @@
YouNowMomentIE,
)
from .youporn import YouPornIE
-from .yourporn import YourPornIE
-from .yourupload import YourUploadIE
from .zaiko import (
ZaikoIE,
ZaikoETicketIE,
diff --git a/yt_dlp/extractor/cableav.py b/yt_dlp/extractor/cableav.py
deleted file mode 100644
index 4a221414ea..0000000000
--- a/yt_dlp/extractor/cableav.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from .common import InfoExtractor
-
-
-class CableAVIE(InfoExtractor):
- _VALID_URL = r'https?://cableav\.tv/(?P[a-zA-Z0-9]+)'
- _TESTS = [{
- 'url': 'https://cableav.tv/lS4iR9lWjN8/',
- 'md5': '7e3fe5e49d61c4233b7f5b0f69b15e18',
- 'info_dict': {
- 'id': 'lS4iR9lWjN8',
- 'ext': 'mp4',
- 'title': '國產麻豆AV 叮叮映畫 DDF001 情欲小說家 - CableAV',
- 'description': '國產AV 480p, 720p 国产麻豆AV 叮叮映画 DDF001 情欲小说家',
- 'thumbnail': r're:^https?://.*\.jpg$',
- }
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- video_url = self._og_search_video_url(webpage, secure=False)
-
- formats = self._extract_m3u8_formats(video_url, video_id, 'mp4')
-
- return {
- 'id': video_id,
- 'title': self._og_search_title(webpage),
- 'description': self._og_search_description(webpage),
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'formats': formats,
- }
diff --git a/yt_dlp/extractor/einthusan.py b/yt_dlp/extractor/einthusan.py
deleted file mode 100644
index 53bc2535d0..0000000000
--- a/yt_dlp/extractor/einthusan.py
+++ /dev/null
@@ -1,105 +0,0 @@
-import json
-
-from .common import InfoExtractor
-from ..compat import (
- compat_b64decode,
- compat_str,
- compat_urlparse,
-)
-from ..utils import (
- extract_attributes,
- ExtractorError,
- get_elements_by_class,
- urlencode_postdata,
-)
-
-
-class EinthusanIE(InfoExtractor):
- _VALID_URL = r'https?://(?Peinthusan\.(?:tv|com|ca))/movie/watch/(?P[^/?#&]+)'
- _TESTS = [{
- 'url': 'https://einthusan.tv/movie/watch/9097/',
- 'md5': 'ff0f7f2065031b8a2cf13a933731c035',
- 'info_dict': {
- 'id': '9097',
- 'ext': 'mp4',
- 'title': 'Ae Dil Hai Mushkil',
- 'description': 'md5:33ef934c82a671a94652a9b4e54d931b',
- 'thumbnail': r're:^https?://.*\.jpg$',
- }
- }, {
- 'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi',
- 'only_matching': True,
- }, {
- 'url': 'https://einthusan.com/movie/watch/9097/',
- 'only_matching': True,
- }, {
- 'url': 'https://einthusan.ca/movie/watch/4E9n/?lang=hindi',
- 'only_matching': True,
- }]
-
- # reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
- def _decrypt(self, encrypted_data, video_id):
- return self._parse_json(compat_b64decode((
- encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1]
- )).decode('utf-8'), video_id)
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- host = mobj.group('host')
- video_id = mobj.group('id')
-
- webpage = self._download_webpage(url, video_id)
-
- title = self._html_search_regex(r'([^<]+)
', webpage, 'title')
-
- player_params = extract_attributes(self._search_regex(
- r'(]+id="UIVideoPlayer"[^>]+>)', webpage, 'player parameters'))
-
- page_id = self._html_search_regex(
- ']+data-pageid="([^"]+)"', webpage, 'page ID')
- video_data = self._download_json(
- 'https://%s/ajax/movie/watch/%s/' % (host, video_id), video_id,
- data=urlencode_postdata({
- 'xEvent': 'UIVideoPlayer.PingOutcome',
- 'xJson': json.dumps({
- 'EJOutcomes': player_params['data-ejpingables'],
- 'NativeHLS': False
- }),
- 'arcVersion': 3,
- 'appVersion': 59,
- 'gorilla.csrf.Token': page_id,
- }))['Data']
-
- if isinstance(video_data, compat_str) and video_data.startswith('/ratelimited/'):
- raise ExtractorError(
- 'Download rate reached. Please try again later.', expected=True)
-
- ej_links = self._decrypt(video_data['EJLinks'], video_id)
-
- formats = []
-
- m3u8_url = ej_links.get('HLSLink')
- if m3u8_url:
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native'))
-
- mp4_url = ej_links.get('MP4Link')
- if mp4_url:
- formats.append({
- 'url': mp4_url,
- })
-
- description = get_elements_by_class('synopsis', webpage)[0]
- thumbnail = self._html_search_regex(
- r''']+src=(["'])(?P(?!\1).+?/moviecovers/(?!\1).+?)\1''',
- webpage, 'thumbnail url', fatal=False, group='url')
- if thumbnail is not None:
- thumbnail = compat_urlparse.urljoin(url, thumbnail)
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'thumbnail': thumbnail,
- 'description': description,
- }
diff --git a/yt_dlp/extractor/jable.py b/yt_dlp/extractor/jable.py
deleted file mode 100644
index 71fed49ea0..0000000000
--- a/yt_dlp/extractor/jable.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- InAdvancePagedList,
- int_or_none,
- orderedSet,
- unified_strdate,
-)
-
-
-class JableIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?jable\.tv/videos/(?P[\w-]+)'
- _TESTS = [{
- 'url': 'https://jable.tv/videos/pppd-812/',
- 'md5': 'f1537283a9bc073c31ff86ca35d9b2a6',
- 'info_dict': {
- 'id': 'pppd-812',
- 'ext': 'mp4',
- 'title': 'PPPD-812 只要表現好巨乳女教師吉根柚莉愛就獎勵學生們在白虎穴內射出精液',
- 'description': 'md5:5b6d4199a854f62c5e56e26ccad19967',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'age_limit': 18,
- 'like_count': int,
- 'view_count': int,
- },
- }, {
- 'url': 'https://jable.tv/videos/apak-220/',
- 'md5': '71f9239d69ced58ab74a816908847cc1',
- 'info_dict': {
- 'id': 'apak-220',
- 'ext': 'mp4',
- 'title': 'md5:5c3861b7cf80112a6e2b70bccf170824',
- 'description': '',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'age_limit': 18,
- 'like_count': int,
- 'view_count': int,
- 'upload_date': '20220319',
- },
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- formats = self._extract_m3u8_formats(
- self._search_regex(r'var\s+hlsUrl\s*=\s*\'([^\']+)', webpage, 'hls_url'), video_id, 'mp4', m3u8_id='hls')
-
- return {
- 'id': video_id,
- 'title': self._og_search_title(webpage),
- 'description': self._og_search_description(webpage, default=''),
- 'thumbnail': self._og_search_thumbnail(webpage, default=None),
- 'formats': formats,
- 'age_limit': 18,
- 'upload_date': unified_strdate(self._search_regex(
- r'class="inactive-color">\D+\s+(\d{4}-\d+-\d+)', webpage, 'upload_date', default=None)),
- 'view_count': int_or_none(self._search_regex(
- r'#icon-eye">