mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-09-28 13:47:53 +00:00
Merge branch 'yt-dlp:master' into rls/arm-ubuntu-bump
This commit is contained in:
commit
23ad448446
|
@ -192,8 +192,8 @@ def test_raise_http_error(self, handler, status):
|
|||
|
||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
||||
@pytest.mark.parametrize('params,extensions', [
|
||||
({'timeout': 0.00001}, {}),
|
||||
({}, {'timeout': 0.00001}),
|
||||
({'timeout': sys.float_info.min}, {}),
|
||||
({}, {'timeout': sys.float_info.min}),
|
||||
])
|
||||
def test_timeout(self, handler, params, extensions):
|
||||
with handler(**params) as rh:
|
||||
|
|
|
@ -690,7 +690,6 @@ def process_color_policy(stream):
|
|||
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
|
||||
self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
|
||||
self.params['http_headers'].pop('Cookie', None)
|
||||
self._request_director = self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
|
||||
|
||||
if auto_init and auto_init != 'no_verbose_header':
|
||||
self.print_debug_header()
|
||||
|
@ -964,6 +963,7 @@ def __exit__(self, *args):
|
|||
def close(self):
|
||||
self.save_cookies()
|
||||
self._request_director.close()
|
||||
del self._request_director
|
||||
|
||||
def trouble(self, message=None, tb=None, is_error=True):
|
||||
"""Determine action to take when a download problem appears.
|
||||
|
@ -4160,6 +4160,10 @@ def build_request_director(self, handlers, preferences=None):
|
|||
director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
|
||||
return director
|
||||
|
||||
@functools.cached_property
|
||||
def _request_director(self):
|
||||
return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
|
||||
|
||||
def encode(self, s):
|
||||
if isinstance(s, bytes):
|
||||
return s # Already encoded
|
||||
|
|
|
@ -444,6 +444,7 @@
|
|||
from .dailymotion import (
|
||||
DailymotionIE,
|
||||
DailymotionPlaylistIE,
|
||||
DailymotionSearchIE,
|
||||
DailymotionUserIE,
|
||||
)
|
||||
from .dailywire import (
|
||||
|
@ -2499,6 +2500,7 @@
|
|||
Zee5SeriesIE,
|
||||
)
|
||||
from .zeenews import ZeeNewsIE
|
||||
from .zenporn import ZenPornIE
|
||||
from .zetland import ZetlandDKArticleIE
|
||||
from .zhihu import ZhihuIE
|
||||
from .zingmp3 import (
|
||||
|
|
|
@ -22,7 +22,7 @@ class AltCensoredIE(InfoExtractor):
|
|||
'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?",
|
||||
'display_id': 'k0srjLSkga8.webm',
|
||||
'release_date': '20180403',
|
||||
'creator': 'Virginie Vota',
|
||||
'creators': ['Virginie Vota'],
|
||||
'release_year': 2018,
|
||||
'upload_date': '20230318',
|
||||
'uploader': 'admin@altcensored.com',
|
||||
|
@ -32,7 +32,7 @@ class AltCensoredIE(InfoExtractor):
|
|||
'duration': 926.09,
|
||||
'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg',
|
||||
'view_count': int,
|
||||
'categories': ['News & Politics'],
|
||||
'categories': ['News & Politics'], # FIXME
|
||||
}
|
||||
}]
|
||||
|
||||
|
@ -62,14 +62,21 @@ class AltCensoredChannelIE(InfoExtractor):
|
|||
'title': 'Virginie Vota',
|
||||
'id': 'UCFPTO55xxHqFqkzRZHu4kcw',
|
||||
},
|
||||
'playlist_count': 91
|
||||
'playlist_count': 85,
|
||||
}, {
|
||||
'url': 'https://altcensored.com/channel/UC9CcJ96HKMWn0LZlcxlpFTw',
|
||||
'info_dict': {
|
||||
'title': 'yukikaze775',
|
||||
'id': 'UC9CcJ96HKMWn0LZlcxlpFTw',
|
||||
},
|
||||
'playlist_count': 4
|
||||
'playlist_count': 4,
|
||||
}, {
|
||||
'url': 'https://altcensored.com/channel/UCfYbb7nga6-icsFWWgS-kWw',
|
||||
'info_dict': {
|
||||
'title': 'Mister Metokur',
|
||||
'id': 'UCfYbb7nga6-icsFWWgS-kWw',
|
||||
},
|
||||
'playlist_count': 121,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -78,7 +85,7 @@ def _real_extract(self, url):
|
|||
url, channel_id, 'Download channel webpage', 'Unable to get channel webpage')
|
||||
title = self._html_search_meta('altcen_title', webpage, 'title', fatal=False)
|
||||
page_count = int_or_none(self._html_search_regex(
|
||||
r'<a[^>]+href="/channel/\w+/page/(\d+)">(?:\1)</a>',
|
||||
r'<a[^>]+href="/channel/[\w-]+/page/(\d+)">(?:\1)</a>',
|
||||
webpage, 'page count', default='1'))
|
||||
|
||||
def page_func(page_num):
|
||||
|
|
|
@ -300,7 +300,7 @@ def _real_extract(self, url):
|
|||
is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig'))
|
||||
if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in):
|
||||
entry['formats'].append({
|
||||
'url': 'https://archive.org/download/' + identifier + '/' + f['name'],
|
||||
'url': 'https://archive.org/download/' + identifier + '/' + urllib.parse.quote(f['name']),
|
||||
'format': f.get('format'),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
|
|
|
@ -1996,7 +1996,7 @@ def _extract_video_metadata(self, url, video_id, season_id):
|
|||
'title': get_element_by_class(
|
||||
'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
|
||||
'description': get_element_by_class(
|
||||
'bstar-meta__desc', webpage) or self._html_search_meta('og:description'),
|
||||
'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
|
||||
}, self._search_json_ld(webpage, video_id, default={}))
|
||||
|
||||
def _get_comments_reply(self, root_id, next_id=0, display_id=None):
|
||||
|
|
|
@ -88,6 +88,20 @@ class CCTVIE(InfoExtractor):
|
|||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# videoCenterId: "id"
|
||||
'url': 'http://news.cctv.com/2024/02/21/ARTIcU5tKIOIF2myEGCATkLo240221.shtml',
|
||||
'info_dict': {
|
||||
'id': '5c846c0518444308ba32c4159df3b3e0',
|
||||
'ext': 'mp4',
|
||||
'title': '《平“语”近人——习近平喜欢的典故》第三季 第5集:风物长宜放眼量',
|
||||
'uploader': 'yangjuan',
|
||||
'timestamp': 1708554940,
|
||||
'upload_date': '20240221',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# var ids = ["id"]
|
||||
'url': 'http://www.ncpa-classic.com/clt/more/416/index.shtml',
|
||||
|
@ -128,7 +142,7 @@ def _real_extract(self, url):
|
|||
|
||||
video_id = self._search_regex(
|
||||
[r'var\s+guid\s*=\s*["\']([\da-fA-F]+)',
|
||||
r'videoCenterId["\']\s*,\s*["\']([\da-fA-F]+)',
|
||||
r'videoCenterId(?:["\']\s*,|:)\s*["\']([\da-fA-F]+)',
|
||||
r'changePlayer\s*\(\s*["\']([\da-fA-F]+)',
|
||||
r'load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)',
|
||||
r'var\s+initMyAray\s*=\s*["\']([\da-fA-F]+)',
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
UserNotLive,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
|
@ -40,7 +40,7 @@ def _real_extract(self, url):
|
|||
note='Downloading channel info', errnote='Unable to download channel info')['content']
|
||||
|
||||
if live_detail.get('status') == 'CLOSE':
|
||||
raise ExtractorError('The channel is not currently live', expected=True)
|
||||
raise UserNotLive(video_id=channel_id)
|
||||
|
||||
live_playback = self._parse_json(live_detail['livePlaybackJson'], channel_id)
|
||||
|
||||
|
|
|
@ -4,27 +4,25 @@
|
|||
|
||||
|
||||
class CloudflareStreamIE(InfoExtractor):
|
||||
_SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
|
||||
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
|
||||
_EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE
|
||||
_EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo='
|
||||
_ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:watch\.)?%s/|
|
||||
%s
|
||||
)
|
||||
(?P<id>%s)
|
||||
''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE)
|
||||
_EMBED_REGEX = [fr'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1']
|
||||
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
|
||||
_EMBED_REGEX = [
|
||||
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1',
|
||||
rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
|
||||
'info_dict': {
|
||||
'id': '31c9291ab41fac05471db4e73aa11717',
|
||||
'ext': 'mp4',
|
||||
'title': '31c9291ab41fac05471db4e73aa11717',
|
||||
'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
|
||||
|
@ -35,6 +33,21 @@ class CloudflareStreamIE(InfoExtractor):
|
|||
}, {
|
||||
'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://upride.cc/incident/shoulder-pass-at-light/',
|
||||
'info_dict': {
|
||||
'id': 'eaef9dea5159cf968be84241b5cedfe7',
|
||||
'ext': 'mp4',
|
||||
'title': 'eaef9dea5159cf968be84241b5cedfe7',
|
||||
'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import functools
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
|
@ -44,36 +45,41 @@ def _real_initialize(self):
|
|||
self._FAMILY_FILTER = ff == 'on' if ff else age_restricted(18, self.get_param('age_limit'))
|
||||
self._set_dailymotion_cookie('ff', 'on' if self._FAMILY_FILTER else 'off')
|
||||
|
||||
def _get_token(self, xid):
|
||||
cookies = self._get_dailymotion_cookies()
|
||||
token = self._get_cookie_value(cookies, 'access_token') or self._get_cookie_value(cookies, 'client_token')
|
||||
if token:
|
||||
return token
|
||||
|
||||
data = {
|
||||
'client_id': 'f1a362d288c1b98099c7',
|
||||
'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
|
||||
}
|
||||
username, password = self._get_login_info()
|
||||
if username:
|
||||
data.update({
|
||||
'grant_type': 'password',
|
||||
'password': password,
|
||||
'username': username,
|
||||
})
|
||||
else:
|
||||
data['grant_type'] = 'client_credentials'
|
||||
try:
|
||||
token = self._download_json(
|
||||
'https://graphql.api.dailymotion.com/oauth/token',
|
||||
None, 'Downloading Access Token',
|
||||
data=urlencode_postdata(data))['access_token']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
raise ExtractorError(self._parse_json(
|
||||
e.cause.response.read().decode(), xid)['error_description'], expected=True)
|
||||
raise
|
||||
self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
|
||||
return token
|
||||
|
||||
def _call_api(self, object_type, xid, object_fields, note, filter_extra=None):
|
||||
if not self._HEADERS.get('Authorization'):
|
||||
cookies = self._get_dailymotion_cookies()
|
||||
token = self._get_cookie_value(cookies, 'access_token') or self._get_cookie_value(cookies, 'client_token')
|
||||
if not token:
|
||||
data = {
|
||||
'client_id': 'f1a362d288c1b98099c7',
|
||||
'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
|
||||
}
|
||||
username, password = self._get_login_info()
|
||||
if username:
|
||||
data.update({
|
||||
'grant_type': 'password',
|
||||
'password': password,
|
||||
'username': username,
|
||||
})
|
||||
else:
|
||||
data['grant_type'] = 'client_credentials'
|
||||
try:
|
||||
token = self._download_json(
|
||||
'https://graphql.api.dailymotion.com/oauth/token',
|
||||
None, 'Downloading Access Token',
|
||||
data=urlencode_postdata(data))['access_token']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
raise ExtractorError(self._parse_json(
|
||||
e.cause.response.read().decode(), xid)['error_description'], expected=True)
|
||||
raise
|
||||
self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
|
||||
self._HEADERS['Authorization'] = 'Bearer ' + token
|
||||
self._HEADERS['Authorization'] = f'Bearer {self._get_token(xid)}'
|
||||
|
||||
resp = self._download_json(
|
||||
'https://graphql.api.dailymotion.com/', xid, note, data=json.dumps({
|
||||
|
@ -393,9 +399,55 @@ def _extract_embed_urls(cls, url, webpage):
|
|||
yield '//dailymotion.com/playlist/%s' % p
|
||||
|
||||
|
||||
class DailymotionSearchIE(DailymotionPlaylistBaseIE):
|
||||
IE_NAME = 'dailymotion:search'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/search/(?P<id>[^/?#]+)/videos'
|
||||
_PAGE_SIZE = 20
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dailymotion.com/search/king of turtles/videos',
|
||||
'info_dict': {
|
||||
'id': 'king of turtles',
|
||||
'title': 'king of turtles',
|
||||
},
|
||||
'playlist_mincount': 90,
|
||||
}]
|
||||
_SEARCH_QUERY = 'query SEARCH_QUERY( $query: String! $page: Int $limit: Int ) { search { videos( query: $query first: $limit page: $page ) { edges { node { xid } } } } } '
|
||||
|
||||
def _call_search_api(self, term, page, note):
|
||||
if not self._HEADERS.get('Authorization'):
|
||||
self._HEADERS['Authorization'] = f'Bearer {self._get_token(term)}'
|
||||
resp = self._download_json(
|
||||
'https://graphql.api.dailymotion.com/', None, note, data=json.dumps({
|
||||
'operationName': 'SEARCH_QUERY',
|
||||
'query': self._SEARCH_QUERY,
|
||||
'variables': {
|
||||
'limit': 20,
|
||||
'page': page,
|
||||
'query': term,
|
||||
}
|
||||
}).encode(), headers=self._HEADERS)
|
||||
obj = traverse_obj(resp, ('data', 'search', {dict}))
|
||||
if not obj:
|
||||
raise ExtractorError(
|
||||
traverse_obj(resp, ('errors', 0, 'message', {str})) or 'Could not fetch search data')
|
||||
|
||||
return obj
|
||||
|
||||
def _fetch_page(self, term, page):
|
||||
page += 1
|
||||
response = self._call_search_api(term, page, f'Searching "{term}" page {page}')
|
||||
for xid in traverse_obj(response, ('videos', 'edges', ..., 'node', 'xid')):
|
||||
yield self.url_result(f'https://www.dailymotion.com/video/{xid}', DailymotionIE, xid)
|
||||
|
||||
def _real_extract(self, url):
|
||||
term = urllib.parse.unquote_plus(self._match_id(url))
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(functools.partial(self._fetch_page, term), self._PAGE_SIZE), term, term)
|
||||
|
||||
|
||||
class DailymotionUserIE(DailymotionPlaylistBaseIE):
|
||||
IE_NAME = 'dailymotion:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||
'info_dict': {
|
||||
|
|
|
@ -8,9 +8,9 @@
|
|||
|
||||
class DumpertIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl(?:
|
||||
/(?:mediabase|embed|item)/|
|
||||
(?:/toppers|/latest|/?)\?selectedId=
|
||||
(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:
|
||||
(?:mediabase|embed|item)/|
|
||||
[^#]*[?&]selectedId=
|
||||
)(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dumpert.nl/item/6646981_951bc60f',
|
||||
|
@ -56,6 +56,9 @@ class DumpertIE(InfoExtractor):
|
|||
}, {
|
||||
'url': 'https://www.dumpert.nl/?selectedId=100031688_b317a185',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dumpert.nl/toppers/dag?selectedId=100086074_f5cef3ac',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
|
@ -1,60 +1,49 @@
|
|||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .dailymotion import DailymotionIE
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
format_field,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
def _make_url_result(self, video_or_full_id, catalog=None):
|
||||
full_id = 'francetv:%s' % video_or_full_id
|
||||
if '@' not in video_or_full_id and catalog:
|
||||
full_id += '@%s' % catalog
|
||||
return self.url_result(
|
||||
full_id, ie=FranceTVIE.ie_key(),
|
||||
video_id=video_or_full_id.split('@')[0])
|
||||
def _make_url_result(self, video_id, url=None):
|
||||
video_id = video_id.split('@')[0] # for compat with old @catalog IDs
|
||||
full_id = f'francetv:{video_id}'
|
||||
if url:
|
||||
full_id = smuggle_url(full_id, {'hostname': urllib.parse.urlparse(url).hostname})
|
||||
return self.url_result(full_id, FranceTVIE, video_id)
|
||||
|
||||
|
||||
class FranceTVIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
https?://
|
||||
sivideo\.webservices\.francetelevisions\.fr/tools/getInfosOeuvre/v2/\?
|
||||
.*?\bidDiffusion=[^&]+|
|
||||
(?:
|
||||
https?://videos\.francetv\.fr/video/|
|
||||
francetv:
|
||||
)
|
||||
(?P<id>[^@]+)(?:@(?P<catalog>.+))?
|
||||
)
|
||||
'''
|
||||
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1']
|
||||
_VALID_URL = r'francetv:(?P<id>[^@#]+)'
|
||||
_GEO_COUNTRIES = ['FR']
|
||||
_GEO_BYPASS = False
|
||||
|
||||
_TESTS = [{
|
||||
# without catalog
|
||||
'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=162311093&callback=_jsonp_loader_callback_request_0',
|
||||
'md5': 'c2248a8de38c4e65ea8fae7b5df2d84f',
|
||||
'url': 'francetv:ec217ecc-0733-48cf-ac06-af1347b849d1',
|
||||
'info_dict': {
|
||||
'id': '162311093',
|
||||
'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
|
||||
'ext': 'mp4',
|
||||
'title': '13h15, le dimanche... - Les mystères de Jésus',
|
||||
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
|
||||
'timestamp': 1502623500,
|
||||
'duration': 2580,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20170813',
|
||||
},
|
||||
}, {
|
||||
# with catalog
|
||||
'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=NI_1004933&catalogue=Zouzous&callback=_jsonp_loader_callback_request_4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://videos.francetv.fr/video/NI_657393@Regions',
|
||||
'only_matching': True,
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'francetv:162311093',
|
||||
'only_matching': True,
|
||||
|
@ -76,10 +65,7 @@ class FranceTVIE(InfoExtractor):
|
|||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_video(self, video_id, catalogue=None):
|
||||
# Videos are identified by idDiffusion so catalogue part is optional.
|
||||
# However when provided, some extra formats may be returned so we pass
|
||||
# it if available.
|
||||
def _extract_video(self, video_id, hostname=None):
|
||||
is_live = None
|
||||
videos = []
|
||||
title = None
|
||||
|
@ -91,18 +77,20 @@ def _extract_video(self, video_id, catalogue=None):
|
|||
timestamp = None
|
||||
spritesheets = None
|
||||
|
||||
for device_type in ('desktop', 'mobile'):
|
||||
# desktop+chrome returns dash; mobile+safari returns hls
|
||||
for device_type, browser in [('desktop', 'chrome'), ('mobile', 'safari')]:
|
||||
dinfo = self._download_json(
|
||||
'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id,
|
||||
video_id, 'Downloading %s video JSON' % device_type, query={
|
||||
f'https://k7.ftven.fr/videos/{video_id}', video_id,
|
||||
f'Downloading {device_type} {browser} video JSON', query=filter_dict({
|
||||
'device_type': device_type,
|
||||
'browser': 'chrome',
|
||||
}, fatal=False)
|
||||
'browser': browser,
|
||||
'domain': hostname,
|
||||
}), fatal=False)
|
||||
|
||||
if not dinfo:
|
||||
continue
|
||||
|
||||
video = dinfo.get('video')
|
||||
video = traverse_obj(dinfo, ('video', {dict}))
|
||||
if video:
|
||||
videos.append(video)
|
||||
if duration is None:
|
||||
|
@ -112,7 +100,7 @@ def _extract_video(self, video_id, catalogue=None):
|
|||
if spritesheets is None:
|
||||
spritesheets = video.get('spritesheets')
|
||||
|
||||
meta = dinfo.get('meta')
|
||||
meta = traverse_obj(dinfo, ('meta', {dict}))
|
||||
if meta:
|
||||
if title is None:
|
||||
title = meta.get('title')
|
||||
|
@ -126,43 +114,47 @@ def _extract_video(self, video_id, catalogue=None):
|
|||
if timestamp is None:
|
||||
timestamp = parse_iso8601(meta.get('broadcasted_at'))
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for video in videos:
|
||||
formats, subtitles, video_url = [], {}, None
|
||||
for video in traverse_obj(videos, lambda _, v: url_or_none(v['url'])):
|
||||
video_url = video['url']
|
||||
format_id = video.get('format')
|
||||
|
||||
video_url = None
|
||||
if video.get('workflow') == 'token-akamai':
|
||||
token_url = video.get('token')
|
||||
if token_url:
|
||||
token_json = self._download_json(
|
||||
token_url, video_id,
|
||||
'Downloading signed %s manifest URL' % format_id)
|
||||
if token_json:
|
||||
video_url = token_json.get('url')
|
||||
if not video_url:
|
||||
video_url = video.get('url')
|
||||
token_url = url_or_none(video.get('token'))
|
||||
if token_url and video.get('workflow') == 'token-akamai':
|
||||
tokenized_url = traverse_obj(self._download_json(
|
||||
token_url, video_id, f'Downloading signed {format_id} manifest URL',
|
||||
fatal=False, query={
|
||||
'format': 'json',
|
||||
'url': video_url,
|
||||
}), ('url', {url_or_none}))
|
||||
if tokenized_url:
|
||||
video_url = tokenized_url
|
||||
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id=format_id, fatal=False))
|
||||
video_url, video_id, f4m_id=format_id or ext, fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
format_id = format_id or 'hls'
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
video_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id=format_id,
|
||||
fatal=False)
|
||||
video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
|
||||
for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
|
||||
if mobj := re.match(rf'{format_id}-[Aa]udio-\w+-(?P<bitrate>\d+)', f['format_id']):
|
||||
f.update({
|
||||
'tbr': int_or_none(mobj.group('bitrate')),
|
||||
'acodec': 'mp4a',
|
||||
})
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
elif ext == 'mpd':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
video_url, video_id, mpd_id=format_id, fatal=False)
|
||||
video_url, video_id, mpd_id=format_id or 'dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
elif video_url.startswith('rtmp'):
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': 'rtmp-%s' % format_id,
|
||||
'format_id': join_nonempty('rtmp', format_id),
|
||||
'ext': 'flv',
|
||||
})
|
||||
else:
|
||||
|
@ -174,6 +166,13 @@ def _extract_video(self, video_id, catalogue=None):
|
|||
|
||||
# XXX: what is video['captions']?
|
||||
|
||||
if not formats and video_url:
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest(video_url), video_id, 'Checking for geo-restriction',
|
||||
fatal=False, expected_status=403)
|
||||
if urlh and urlh.headers.get('x-errortype') == 'geo':
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
|
||||
|
||||
for f in formats:
|
||||
if f.get('acodec') != 'none' and f.get('language') in ('qtz', 'qad'):
|
||||
f['language_preference'] = -10
|
||||
|
@ -194,7 +193,7 @@ def _extract_video(self, video_id, catalogue=None):
|
|||
# a 10×10 grid of thumbnails corresponding to approximately
|
||||
# 2 seconds of the video; the last spritesheet may be shorter
|
||||
'duration': 200,
|
||||
} for sheet in spritesheets]
|
||||
} for sheet in traverse_obj(spritesheets, (..., {url_or_none}))]
|
||||
})
|
||||
|
||||
return {
|
||||
|
@ -210,21 +209,15 @@ def _extract_video(self, video_id, catalogue=None):
|
|||
'series': title if episode_number else None,
|
||||
'episode_number': int_or_none(episode_number),
|
||||
'season_number': int_or_none(season_number),
|
||||
'_format_sort_fields': ('res', 'tbr', 'proto'), # prioritize m3u8 over dash
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
catalog = mobj.group('catalog')
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
video_id = self._match_id(url)
|
||||
hostname = smuggled_data.get('hostname') or 'www.france.tv'
|
||||
|
||||
if not video_id:
|
||||
qs = parse_qs(url)
|
||||
video_id = qs.get('idDiffusion', [None])[0]
|
||||
catalog = qs.get('catalogue', [None])[0]
|
||||
if not video_id:
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
return self._extract_video(video_id, catalog)
|
||||
return self._extract_video(video_id, hostname=hostname)
|
||||
|
||||
|
||||
class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
||||
|
@ -246,6 +239,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
|||
},
|
||||
'add_ie': [FranceTVIE.ie_key()],
|
||||
}, {
|
||||
# geo-restricted
|
||||
'url': 'https://www.france.tv/enfants/six-huit-ans/foot2rue/saison-1/3066387-duel-au-vieux-port.html',
|
||||
'info_dict': {
|
||||
'id': 'a9050959-eedd-4b4a-9b0d-de6eeaa73e44',
|
||||
|
@ -304,17 +298,16 @@ def _real_extract(self, url):
|
|||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
catalogue = None
|
||||
video_id = self._search_regex(
|
||||
r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
webpage, 'video id', default=None, group='id')
|
||||
|
||||
if not video_id:
|
||||
video_id, catalogue = self._html_search_regex(
|
||||
r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||
webpage, 'video ID').split('@')
|
||||
video_id = self._html_search_regex(
|
||||
r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@"]+@[^"]+)"',
|
||||
webpage, 'video ID')
|
||||
|
||||
return self._make_url_result(video_id, catalogue)
|
||||
return self._make_url_result(video_id, url=url)
|
||||
|
||||
|
||||
class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||
|
@ -328,8 +321,9 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
|||
'ext': 'mp4',
|
||||
'title': 'Soir 3',
|
||||
'upload_date': '20190822',
|
||||
'timestamp': 1566510900,
|
||||
'description': 'md5:72d167097237701d6e8452ff03b83c00',
|
||||
'timestamp': 1566510730,
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'duration': 1637,
|
||||
'subtitles': {
|
||||
'fr': 'mincount:2',
|
||||
},
|
||||
|
@ -344,8 +338,8 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
|||
'info_dict': {
|
||||
'id': '7d204c9e-a2d3-11eb-9e4c-000d3a23d482',
|
||||
'ext': 'mp4',
|
||||
'title': 'Covid-19 : une situation catastrophique à New Dehli',
|
||||
'thumbnail': str,
|
||||
'title': 'Covid-19 : une situation catastrophique à New Dehli - Édition du mercredi 21 avril 2021',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'duration': 76,
|
||||
'timestamp': 1619028518,
|
||||
'upload_date': '20210421',
|
||||
|
@ -371,11 +365,17 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
|||
'id': 'x4iiko0',
|
||||
'ext': 'mp4',
|
||||
'title': 'NDDL, référendum, Brexit : Cécile Duflot répond à Patrick Cohen',
|
||||
'description': 'Au lendemain de la victoire du "oui" au référendum sur l\'aéroport de Notre-Dame-des-Landes, l\'ancienne ministre écologiste est l\'invitée de Patrick Cohen. Plus d\'info : https://www.franceinter.fr/emissions/le-7-9/le-7-9-27-juin-2016',
|
||||
'description': 'md5:fdcb582c370756293a65cdfbc6ecd90e',
|
||||
'timestamp': 1467011958,
|
||||
'upload_date': '20160627',
|
||||
'uploader': 'France Inter',
|
||||
'uploader_id': 'x2q2ez',
|
||||
'upload_date': '20160627',
|
||||
'view_count': int,
|
||||
'tags': ['Politique', 'France Inter', '27 juin 2016', 'Linvité de 8h20', 'Cécile Duflot', 'Patrick Cohen'],
|
||||
'age_limit': 0,
|
||||
'duration': 640,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:https://[^/?#]+/v/[^/?#]+/x1080',
|
||||
},
|
||||
'add_ie': ['Dailymotion'],
|
||||
}, {
|
||||
|
@ -405,4 +405,4 @@ def _real_extract(self, url):
|
|||
r'(?:data-id|<figure[^<]+\bid)=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
|
||||
webpage, 'video id')
|
||||
|
||||
return self._make_url_result(video_id)
|
||||
return self._make_url_result(video_id, url=url)
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
from .common import InfoExtractor
|
||||
from .francetv import FranceTVIE
|
||||
from .francetv import FranceTVBaseInfoExtractor
|
||||
|
||||
|
||||
class LumniIE(InfoExtractor):
|
||||
class LumniIE(FranceTVBaseInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lumni\.fr/video/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.lumni.fr/video/l-homme-et-son-environnement-dans-la-revolution-industrielle',
|
||||
|
@ -21,4 +20,4 @@ def _real_extract(self, url):
|
|||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._html_search_regex(
|
||||
r'<div[^>]+data-factoryid\s*=\s*["\']([^"\']+)', webpage, 'video id')
|
||||
return self.url_result(f'francetv:{video_id}', FranceTVIE, video_id)
|
||||
return self._make_url_result(video_id, url=url)
|
||||
|
|
|
@ -13,13 +13,11 @@
|
|||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
bug_reports_message,
|
||||
clean_html,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
parse_filesize,
|
||||
parse_iso8601,
|
||||
parse_resolution,
|
||||
qualities,
|
||||
|
@ -38,6 +36,8 @@
|
|||
class NiconicoIE(InfoExtractor):
|
||||
IE_NAME = 'niconico'
|
||||
IE_DESC = 'ニコニコ動画'
|
||||
_GEO_COUNTRIES = ['JP']
|
||||
_GEO_BYPASS = False
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
||||
|
@ -55,25 +55,31 @@ class NiconicoIE(InfoExtractor):
|
|||
'duration': 33,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'genres': ['未設定'],
|
||||
'tags': [],
|
||||
'expected_protocol': str,
|
||||
},
|
||||
'skip': 'Requires an account',
|
||||
}, {
|
||||
# File downloaded with and without credentials are different, so omit
|
||||
# the md5 field
|
||||
'url': 'http://www.nicovideo.jp/watch/nm14296458',
|
||||
'info_dict': {
|
||||
'id': 'nm14296458',
|
||||
'ext': 'swf',
|
||||
'title': '【鏡音リン】Dance on media【オリジナル】take2!',
|
||||
'description': 'md5:689f066d74610b3b22e0f1739add0f58',
|
||||
'ext': 'mp4',
|
||||
'title': '【Kagamine Rin】Dance on media【Original】take2!',
|
||||
'description': 'md5:9368f2b1f4178de64f2602c2f3d6cbf5',
|
||||
'thumbnail': r're:https?://.*',
|
||||
'uploader': 'りょうた',
|
||||
'uploader_id': '18822557',
|
||||
'upload_date': '20110429',
|
||||
'timestamp': 1304065916,
|
||||
'duration': 209,
|
||||
'duration': 208.0,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'genres': ['音楽・サウンド'],
|
||||
'tags': ['Translation_Request', 'Kagamine_Rin', 'Rin_Original'],
|
||||
'expected_protocol': str,
|
||||
},
|
||||
'skip': 'Requires an account',
|
||||
}, {
|
||||
# 'video exists but is marked as "deleted"
|
||||
# md5 is unstable
|
||||
|
@ -107,22 +113,24 @@ class NiconicoIE(InfoExtractor):
|
|||
}, {
|
||||
# video not available via `getflv`; "old" HTML5 video
|
||||
'url': 'http://www.nicovideo.jp/watch/sm1151009',
|
||||
'md5': '8fa81c364eb619d4085354eab075598a',
|
||||
'md5': 'f95a3d259172667b293530cc2e41ebda',
|
||||
'info_dict': {
|
||||
'id': 'sm1151009',
|
||||
'ext': 'mp4',
|
||||
'title': 'マスターシステム本体内蔵のスペハリのメインテーマ(PSG版)',
|
||||
'description': 'md5:6ee077e0581ff5019773e2e714cdd0b7',
|
||||
'description': 'md5:f95a3d259172667b293530cc2e41ebda',
|
||||
'thumbnail': r're:https?://.*',
|
||||
'duration': 184,
|
||||
'timestamp': 1190868283,
|
||||
'upload_date': '20070927',
|
||||
'timestamp': 1190835883,
|
||||
'upload_date': '20070926',
|
||||
'uploader': 'denden2',
|
||||
'uploader_id': '1392194',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'genres': ['ゲーム'],
|
||||
'tags': [],
|
||||
'expected_protocol': str,
|
||||
},
|
||||
'skip': 'Requires an account',
|
||||
}, {
|
||||
# "New" HTML5 video
|
||||
# md5 is unstable
|
||||
|
@ -132,16 +140,18 @@ class NiconicoIE(InfoExtractor):
|
|||
'ext': 'mp4',
|
||||
'title': '新作TVアニメ「戦姫絶唱シンフォギアAXZ」PV 最高画質',
|
||||
'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
|
||||
'timestamp': 1498514060,
|
||||
'timestamp': 1498481660,
|
||||
'upload_date': '20170626',
|
||||
'uploader': 'ゲスト',
|
||||
'uploader': 'no-namamae',
|
||||
'uploader_id': '40826363',
|
||||
'thumbnail': r're:https?://.*',
|
||||
'duration': 198,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'genres': ['アニメ'],
|
||||
'tags': [],
|
||||
'expected_protocol': str,
|
||||
},
|
||||
'skip': 'Requires an account',
|
||||
}, {
|
||||
# Video without owner
|
||||
'url': 'http://www.nicovideo.jp/watch/sm18238488',
|
||||
|
@ -151,7 +161,7 @@ class NiconicoIE(InfoExtractor):
|
|||
'ext': 'mp4',
|
||||
'title': '【実写版】ミュータントタートルズ',
|
||||
'description': 'md5:15df8988e47a86f9e978af2064bf6d8e',
|
||||
'timestamp': 1341160408,
|
||||
'timestamp': 1341128008,
|
||||
'upload_date': '20120701',
|
||||
'uploader': None,
|
||||
'uploader_id': None,
|
||||
|
@ -159,8 +169,10 @@ class NiconicoIE(InfoExtractor):
|
|||
'duration': 5271,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'genres': ['エンターテイメント'],
|
||||
'tags': [],
|
||||
'expected_protocol': str,
|
||||
},
|
||||
'skip': 'Requires an account',
|
||||
}, {
|
||||
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
|
||||
'only_matching': True,
|
||||
|
@ -353,15 +365,10 @@ def _extract_format_for_quality(self, video_id, audio_quality, video_quality, dm
|
|||
if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
|
||||
return None
|
||||
|
||||
def extract_video_quality(video_quality):
|
||||
return parse_filesize('%sB' % self._search_regex(
|
||||
r'\| ([0-9]*\.?[0-9]*[MK])', video_quality, 'vbr', default=''))
|
||||
|
||||
format_id = '-'.join(
|
||||
[remove_start(s['id'], 'archive_') for s in (video_quality, audio_quality)] + [dmc_protocol])
|
||||
|
||||
vid_qual_label = traverse_obj(video_quality, ('metadata', 'label'))
|
||||
vid_quality = traverse_obj(video_quality, ('metadata', 'bitrate'))
|
||||
|
||||
return {
|
||||
'url': 'niconico_dmc:%s/%s/%s' % (video_id, video_quality['id'], audio_quality['id']),
|
||||
|
@ -370,10 +377,15 @@ def extract_video_quality(video_quality):
|
|||
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
|
||||
'acodec': 'aac',
|
||||
'vcodec': 'h264',
|
||||
'abr': float_or_none(traverse_obj(audio_quality, ('metadata', 'bitrate')), 1000),
|
||||
'vbr': float_or_none(vid_quality if vid_quality > 0 else extract_video_quality(vid_qual_label), 1000),
|
||||
'height': traverse_obj(video_quality, ('metadata', 'resolution', 'height')),
|
||||
'width': traverse_obj(video_quality, ('metadata', 'resolution', 'width')),
|
||||
**traverse_obj(audio_quality, ('metadata', {
|
||||
'abr': ('bitrate', {functools.partial(float_or_none, scale=1000)}),
|
||||
'asr': ('samplingRate', {int_or_none}),
|
||||
})),
|
||||
**traverse_obj(video_quality, ('metadata', {
|
||||
'vbr': ('bitrate', {functools.partial(float_or_none, scale=1000)}),
|
||||
'height': ('resolution', 'height', {int_or_none}),
|
||||
'width': ('resolution', 'width', {int_or_none}),
|
||||
})),
|
||||
'quality': -2 if 'low' in video_quality['id'] else None,
|
||||
'protocol': 'niconico_dmc',
|
||||
'expected_protocol': dmc_protocol, # XXX: This is not a documented field
|
||||
|
@ -383,6 +395,63 @@ def extract_video_quality(video_quality):
|
|||
}
|
||||
}
|
||||
|
||||
def _yield_dmc_formats(self, api_data, video_id):
|
||||
dmc_data = traverse_obj(api_data, ('media', 'delivery', 'movie'))
|
||||
audios = traverse_obj(dmc_data, ('audios', ..., {dict}))
|
||||
videos = traverse_obj(dmc_data, ('videos', ..., {dict}))
|
||||
protocols = traverse_obj(dmc_data, ('session', 'protocols', ..., {str}))
|
||||
if not all((audios, videos, protocols)):
|
||||
return
|
||||
|
||||
for audio_quality, video_quality, protocol in itertools.product(audios, videos, protocols):
|
||||
if fmt := self._extract_format_for_quality(video_id, audio_quality, video_quality, protocol):
|
||||
yield fmt
|
||||
|
||||
def _yield_dms_formats(self, api_data, video_id):
|
||||
fmt_filter = lambda _, v: v['isAvailable'] and v['id']
|
||||
videos = traverse_obj(api_data, ('media', 'domand', 'videos', fmt_filter))
|
||||
audios = traverse_obj(api_data, ('media', 'domand', 'audios', fmt_filter))
|
||||
access_key = traverse_obj(api_data, ('media', 'domand', 'accessRightKey', {str}))
|
||||
track_id = traverse_obj(api_data, ('client', 'watchTrackId', {str}))
|
||||
if not all((videos, audios, access_key, track_id)):
|
||||
return
|
||||
|
||||
dms_m3u8_url = self._download_json(
|
||||
f'https://nvapi.nicovideo.jp/v1/watch/{video_id}/access-rights/hls', video_id,
|
||||
data=json.dumps({
|
||||
'outputs': list(itertools.product((v['id'] for v in videos), (a['id'] for a in audios)))
|
||||
}).encode(), query={'actionTrackId': track_id}, headers={
|
||||
'x-access-right-key': access_key,
|
||||
'x-frontend-id': 6,
|
||||
'x-frontend-version': 0,
|
||||
'x-request-with': 'https://www.nicovideo.jp',
|
||||
})['data']['contentUrl']
|
||||
# Getting all audio formats results in duplicate video formats which we filter out later
|
||||
dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id)
|
||||
|
||||
# m3u8 extraction does not provide audio bitrates, so extract from the API data and fix
|
||||
for audio_fmt in traverse_obj(dms_fmts, lambda _, v: v['vcodec'] == 'none'):
|
||||
yield {
|
||||
**audio_fmt,
|
||||
**traverse_obj(audios, (lambda _, v: audio_fmt['format_id'].startswith(v['id']), {
|
||||
'format_id': ('id', {str}),
|
||||
'abr': ('bitRate', {functools.partial(float_or_none, scale=1000)}),
|
||||
'asr': ('samplingRate', {int_or_none}),
|
||||
}), get_all=False),
|
||||
'acodec': 'aac',
|
||||
'ext': 'm4a',
|
||||
}
|
||||
|
||||
# Sort before removing dupes to keep the format dicts with the lowest tbr
|
||||
video_fmts = sorted((fmt for fmt in dms_fmts if fmt['vcodec'] != 'none'), key=lambda f: f['tbr'])
|
||||
self._remove_duplicate_formats(video_fmts)
|
||||
# Calculate the true vbr/tbr by subtracting the lowest abr
|
||||
min_abr = min(traverse_obj(audios, (..., 'bitRate', {float_or_none})), default=0) / 1000
|
||||
for video_fmt in video_fmts:
|
||||
video_fmt['tbr'] -= min_abr
|
||||
video_fmt['format_id'] = f'video-{video_fmt["tbr"]:.0f}'
|
||||
yield video_fmt
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
|
@ -409,19 +478,29 @@ def _real_extract(self, url):
|
|||
webpage, 'error reason', default=None)
|
||||
if not error_msg:
|
||||
raise
|
||||
raise ExtractorError(re.sub(r'\s+', ' ', error_msg), expected=True)
|
||||
raise ExtractorError(clean_html(error_msg), expected=True)
|
||||
|
||||
formats = []
|
||||
|
||||
def get_video_info(*items, get_first=True, **kwargs):
|
||||
return traverse_obj(api_data, ('video', *items), get_all=not get_first, **kwargs)
|
||||
|
||||
quality_info = api_data['media']['delivery']['movie']
|
||||
session_api_data = quality_info['session']
|
||||
for (audio_quality, video_quality, protocol) in itertools.product(quality_info['audios'], quality_info['videos'], session_api_data['protocols']):
|
||||
fmt = self._extract_format_for_quality(video_id, audio_quality, video_quality, protocol)
|
||||
if fmt:
|
||||
formats.append(fmt)
|
||||
availability = self._availability(**(traverse_obj(api_data, ('payment', 'video', {
|
||||
'needs_premium': ('isPremium', {bool}),
|
||||
'needs_subscription': ('isAdmission', {bool}),
|
||||
})) or {'needs_auth': True}))
|
||||
formats = [*self._yield_dmc_formats(api_data, video_id),
|
||||
*self._yield_dms_formats(api_data, video_id)]
|
||||
if not formats:
|
||||
fail_msg = clean_html(self._html_search_regex(
|
||||
r'<p[^>]+\bclass="fail-message"[^>]*>(?P<msg>.+?)</p>',
|
||||
webpage, 'fail message', default=None, group='msg'))
|
||||
if fail_msg:
|
||||
self.to_screen(f'Niconico said: {fail_msg}')
|
||||
if fail_msg and 'された地域と同じ地域からのみ視聴できます。' in fail_msg:
|
||||
availability = None
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
|
||||
elif availability == 'premium_only':
|
||||
self.raise_login_required('This video requires premium', metadata_available=True)
|
||||
elif availability == 'subscriber_only':
|
||||
self.raise_login_required('This video is for members only', metadata_available=True)
|
||||
elif availability == 'needs_auth':
|
||||
self.raise_login_required(metadata_available=False)
|
||||
|
||||
# Start extracting information
|
||||
tags = None
|
||||
|
@ -440,11 +519,15 @@ def get_video_info(*items, get_first=True, **kwargs):
|
|||
|
||||
thumb_prefs = qualities(['url', 'middleUrl', 'largeUrl', 'player', 'ogp'])
|
||||
|
||||
def get_video_info(*items, get_first=True, **kwargs):
|
||||
return traverse_obj(api_data, ('video', *items), get_all=not get_first, **kwargs)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'_api_data': api_data,
|
||||
'title': get_video_info(('originalTitle', 'title')) or self._og_search_title(webpage, default=None),
|
||||
'formats': formats,
|
||||
'availability': availability,
|
||||
'thumbnails': [{
|
||||
'id': key,
|
||||
'url': url,
|
||||
|
@ -472,8 +555,11 @@ def get_video_info(*items, get_first=True, **kwargs):
|
|||
|
||||
def _get_subtitles(self, video_id, api_data):
|
||||
comments_info = traverse_obj(api_data, ('comment', 'nvComment', {dict})) or {}
|
||||
if not comments_info.get('server'):
|
||||
return
|
||||
|
||||
danmaku = traverse_obj(self._download_json(
|
||||
f'{comments_info.get("server")}/v1/threads', video_id, data=json.dumps({
|
||||
f'{comments_info["server"]}/v1/threads', video_id, data=json.dumps({
|
||||
'additionals': {},
|
||||
'params': comments_info.get('params'),
|
||||
'threadKey': comments_info.get('threadKey'),
|
||||
|
@ -489,10 +575,6 @@ def _get_subtitles(self, video_id, api_data):
|
|||
note='Downloading comments', errnote='Failed to download comments'),
|
||||
('data', 'threads', ..., 'comments', ...))
|
||||
|
||||
if not danmaku:
|
||||
self.report_warning(f'Failed to get comments. {bug_reports_message()}')
|
||||
return
|
||||
|
||||
return {
|
||||
'comments': [{
|
||||
'ext': 'json',
|
||||
|
|
|
@ -35,6 +35,7 @@ class NTVRuIE(InfoExtractor):
|
|||
'duration': 172,
|
||||
'view_count': int,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
|
||||
'md5': '82dbd49b38e3af1d00df16acbeab260c',
|
||||
|
@ -78,7 +79,8 @@ class NTVRuIE(InfoExtractor):
|
|||
}]
|
||||
|
||||
_VIDEO_ID_REGEXES = [
|
||||
r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)',
|
||||
r'<meta property="og:url" content="https?://www\.ntv\.ru/video/(\d+)',
|
||||
r'<meta property="og:video:(?:url|iframe)" content="https?://www\.ntv\.ru/embed/(\d+)',
|
||||
r'<video embed=[^>]+><id>(\d+)</id>',
|
||||
r'<video restriction[^>]+><key>(\d+)</key>',
|
||||
]
|
||||
|
|
|
@ -28,6 +28,29 @@ class RaiBaseIE(InfoExtractor):
|
|||
_GEO_COUNTRIES = ['IT']
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _fix_m3u8_formats(self, media_url, video_id):
|
||||
fmts = self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
|
||||
# Fix malformed m3u8 manifests by setting audio-only/video-only formats
|
||||
for f in fmts:
|
||||
if not f.get('acodec'):
|
||||
f['acodec'] = 'mp4a'
|
||||
if not f.get('vcodec'):
|
||||
f['vcodec'] = 'avc1'
|
||||
man_url = f['url']
|
||||
if re.search(r'chunklist(?:_b\d+)*_ao[_.]', man_url): # audio only
|
||||
f['vcodec'] = 'none'
|
||||
elif re.search(r'chunklist(?:_b\d+)*_vo[_.]', man_url): # video only
|
||||
f['acodec'] = 'none'
|
||||
else: # video+audio
|
||||
if f['acodec'] == 'none':
|
||||
f['acodec'] = 'mp4a'
|
||||
if f['vcodec'] == 'none':
|
||||
f['vcodec'] = 'avc1'
|
||||
|
||||
return fmts
|
||||
|
||||
def _extract_relinker_info(self, relinker_url, video_id, audio_only=False):
|
||||
def fix_cdata(s):
|
||||
# remove \r\n\t before and after <![CDATA[ ]]> to avoid
|
||||
|
@ -69,8 +92,7 @@ def fix_cdata(s):
|
|||
'format_id': 'https-mp3',
|
||||
})
|
||||
elif ext == 'm3u8' or 'format=m3u8' in media_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._fix_m3u8_formats(media_url, video_id))
|
||||
elif ext == 'f4m':
|
||||
# very likely no longer needed. Cannot find any url that uses it.
|
||||
manifest_url = update_url_query(
|
||||
|
@ -153,10 +175,10 @@ def get_format_info(tbr):
|
|||
'format_id': f'https-{tbr}',
|
||||
'width': format_copy.get('width'),
|
||||
'height': format_copy.get('height'),
|
||||
'tbr': format_copy.get('tbr'),
|
||||
'vcodec': format_copy.get('vcodec'),
|
||||
'acodec': format_copy.get('acodec'),
|
||||
'fps': format_copy.get('fps'),
|
||||
'tbr': format_copy.get('tbr') or tbr,
|
||||
'vcodec': format_copy.get('vcodec') or 'avc1',
|
||||
'acodec': format_copy.get('acodec') or 'mp4a',
|
||||
'fps': format_copy.get('fps') or 25,
|
||||
} if format_copy else {
|
||||
'format_id': f'https-{tbr}',
|
||||
'width': _QUALITY[tbr][0],
|
||||
|
@ -245,7 +267,7 @@ class RaiPlayIE(RaiBaseIE):
|
|||
'series': 'Report',
|
||||
'season': '2013/14',
|
||||
'subtitles': {'it': 'count:4'},
|
||||
'release_year': 2022,
|
||||
'release_year': 2024,
|
||||
'episode': 'Espresso nel caffè - 07/04/2014',
|
||||
'timestamp': 1396919880,
|
||||
'upload_date': '20140408',
|
||||
|
@ -253,7 +275,7 @@ class RaiPlayIE(RaiBaseIE):
|
|||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
# 1080p direct mp4 url
|
||||
# 1080p
|
||||
'url': 'https://www.raiplay.it/video/2021/11/Blanca-S1E1-Senza-occhi-b1255a4a-8e72-4a2f-b9f3-fc1308e00736.html',
|
||||
'md5': 'aeda7243115380b2dd5e881fd42d949a',
|
||||
'info_dict': {
|
||||
|
@ -274,7 +296,7 @@ class RaiPlayIE(RaiBaseIE):
|
|||
'episode': 'Senza occhi',
|
||||
'timestamp': 1637318940,
|
||||
'upload_date': '20211119',
|
||||
'formats': 'count:12',
|
||||
'formats': 'count:7',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'expected_warnings': ['Video not available. Likely due to geo-restriction.']
|
||||
|
@ -527,7 +549,7 @@ class RaiPlaySoundPlaylistIE(InfoExtractor):
|
|||
'info_dict': {
|
||||
'id': 'ilruggitodelconiglio',
|
||||
'title': 'Il Ruggito del Coniglio',
|
||||
'description': 'md5:48cff6972435964284614d70474132e6',
|
||||
'description': 'md5:62a627b3a2d0635d08fa8b6e0a04f27e',
|
||||
},
|
||||
'playlist_mincount': 65,
|
||||
}, {
|
||||
|
@ -634,19 +656,20 @@ def _real_extract(self, url):
|
|||
}
|
||||
|
||||
|
||||
class RaiNewsIE(RaiIE): # XXX: Do not subclass from concrete IE
|
||||
class RaiNewsIE(RaiBaseIE):
|
||||
_VALID_URL = rf'https?://(www\.)?rainews\.it/(?!articoli)[^?#]+-(?P<id>{RaiBaseIE._UUID_RE})(?:-[^/?#]+)?\.html'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+data-src="(?P<url>/iframe/[^?#]+?{RaiBaseIE._UUID_RE}\.html)']
|
||||
_TESTS = [{
|
||||
# new rainews player (#3911)
|
||||
'url': 'https://www.rainews.it/rubriche/24mm/video/2022/05/24mm-del-29052022-12cf645d-1ffd-4220-b27c-07c226dbdecf.html',
|
||||
'url': 'https://www.rainews.it/video/2024/02/membri-della-croce-rossa-evacuano-gli-abitanti-di-un-villaggio-nella-regione-ucraina-di-kharkiv-il-filmato-dallucraina--31e8017c-845c-43f5-9c48-245b43c3a079.html',
|
||||
'info_dict': {
|
||||
'id': '12cf645d-1ffd-4220-b27c-07c226dbdecf',
|
||||
'id': '31e8017c-845c-43f5-9c48-245b43c3a079',
|
||||
'ext': 'mp4',
|
||||
'title': 'Puntata del 29/05/2022',
|
||||
'duration': 1589,
|
||||
'upload_date': '20220529',
|
||||
'title': 'md5:1e81364b09de4a149042bac3c7d36f0b',
|
||||
'duration': 196,
|
||||
'upload_date': '20240225',
|
||||
'uploader': 'rainews',
|
||||
'formats': 'count:2',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
|
@ -659,7 +682,8 @@ class RaiNewsIE(RaiIE): # XXX: Do not subclass from concrete IE
|
|||
'description': 'I film in uscita questa settimana.',
|
||||
'thumbnail': r're:^https?://.*\.png$',
|
||||
'duration': 833,
|
||||
'upload_date': '20161103'
|
||||
'upload_date': '20161103',
|
||||
'formats': 'count:8',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'expected_warnings': ['unable to extract player_data'],
|
||||
|
@ -684,7 +708,7 @@ def _real_extract(self, url):
|
|||
if not relinker_url:
|
||||
# fallback on old implementation for some old content
|
||||
try:
|
||||
return self._extract_from_content_id(video_id, url)
|
||||
return RaiIE._real_extract(self, url)
|
||||
except GeoRestrictedError:
|
||||
raise
|
||||
except ExtractorError as e:
|
||||
|
|
|
@ -247,17 +247,17 @@ class MujRozhlasIE(RozhlasBaseIE):
|
|||
'url': 'https://www.mujrozhlas.cz/vykopavky/ach-jo-zase-teleci-rizek-je-mnohem-min-cesky-nez-jsme-si-mysleli',
|
||||
'md5': '6f8fd68663e64936623e67c152a669e0',
|
||||
'info_dict': {
|
||||
'id': '10739193',
|
||||
'id': '10787730',
|
||||
'ext': 'mp3',
|
||||
'title': 'Ach jo, zase to telecí! Řízek je mnohem míň český, než jsme si mysleli',
|
||||
'description': 'md5:db7141e9caaedc9041ec7cefb9a62908',
|
||||
'timestamp': 1684915200,
|
||||
'modified_timestamp': 1684922446,
|
||||
'modified_timestamp': 1687550432,
|
||||
'series': 'Vykopávky',
|
||||
'thumbnail': 'https://portal.rozhlas.cz/sites/default/files/images/84377046610af6ddc54d910b1dd7a22b.jpg',
|
||||
'channel_id': 'radio-wave',
|
||||
'upload_date': '20230524',
|
||||
'modified_date': '20230524',
|
||||
'modified_date': '20230623',
|
||||
},
|
||||
}, {
|
||||
# serial extraction
|
||||
|
@ -277,6 +277,26 @@ class MujRozhlasIE(RozhlasBaseIE):
|
|||
'title': 'Nespavci',
|
||||
'description': 'md5:c430adcbf9e2b9eac88b745881e814dc',
|
||||
},
|
||||
}, {
|
||||
# serialPart
|
||||
'url': 'https://www.mujrozhlas.cz/povidka/gustavo-adolfo-becquer-hora-duchu',
|
||||
'info_dict': {
|
||||
'id': '8889035',
|
||||
'ext': 'm4a',
|
||||
'title': 'Gustavo Adolfo Bécquer: Hora duchů',
|
||||
'description': 'md5:343a15257b376c276e210b78e900ffea',
|
||||
'chapter': 'Hora duchů a Polibek – dva tajemné příběhy Gustava Adolfa Bécquera',
|
||||
'thumbnail': 'https://portal.rozhlas.cz/sites/default/files/images/2adfe1387fb140634be725c1ccf26214.jpg',
|
||||
'timestamp': 1708173000,
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'series': 'Povídka',
|
||||
'modified_date': '20240217',
|
||||
'upload_date': '20240217',
|
||||
'modified_timestamp': 1708173198,
|
||||
'channel_id': 'vltava',
|
||||
},
|
||||
'params': {'skip_download': 'dash'},
|
||||
}]
|
||||
|
||||
def _call_api(self, path, item_id, msg='API JSON'):
|
||||
|
@ -322,7 +342,7 @@ def _real_extract(self, url):
|
|||
|
||||
entity = info['siteEntityBundle']
|
||||
|
||||
if entity == 'episode':
|
||||
if entity in ('episode', 'serialPart'):
|
||||
return self._extract_audio_entry(self._call_api(
|
||||
'episodes', info['contentId'], 'episode info API JSON'))
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, traverse_obj
|
||||
from ..utils import ExtractorError, int_or_none, traverse_obj
|
||||
|
||||
|
||||
class SwearnetEpisodeIE(InfoExtractor):
|
||||
|
@ -51,7 +51,13 @@ def _real_extract(self, url):
|
|||
display_id, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
|
||||
try:
|
||||
external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
|
||||
except ExtractorError:
|
||||
if 'Upgrade Now' in webpage:
|
||||
self.raise_login_required()
|
||||
raise
|
||||
|
||||
json_data = self._download_json(
|
||||
f'https://play.vidyard.com/player/{external_id}.json', display_id)['payload']['chapters'][0]
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
|
@ -15,7 +15,6 @@
|
|||
UserNotLive,
|
||||
determine_ext,
|
||||
format_field,
|
||||
get_first,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
merge_dicts,
|
||||
|
@ -51,7 +50,13 @@ def _create_url(user_id, video_id):
|
|||
def _get_sigi_state(self, webpage, display_id):
|
||||
return self._search_json(
|
||||
r'<script[^>]+\bid="(?:SIGI_STATE|sigi-persisted-data)"[^>]*>', webpage,
|
||||
'sigi state', display_id, end_pattern=r'</script>')
|
||||
'sigi state', display_id, end_pattern=r'</script>', default={})
|
||||
|
||||
def _get_universal_data(self, webpage, display_id):
|
||||
return traverse_obj(self._search_json(
|
||||
r'<script[^>]+\bid="__UNIVERSAL_DATA_FOR_REHYDRATION__"[^>]*>', webpage,
|
||||
'universal data', display_id, end_pattern=r'</script>', default={}),
|
||||
('__DEFAULT_SCOPE__', {dict})) or {}
|
||||
|
||||
def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
|
||||
note='Downloading API JSON', errnote='Unable to download API page'):
|
||||
|
@ -219,8 +224,8 @@ def audio_meta(url):
|
|||
def extract_addr(addr, add_meta={}):
|
||||
parsed_meta, res = parse_url_key(addr.get('url_key', ''))
|
||||
if res:
|
||||
known_resolutions.setdefault(res, {}).setdefault('height', add_meta.get('height') or addr.get('height'))
|
||||
known_resolutions[res].setdefault('width', add_meta.get('width') or addr.get('width'))
|
||||
known_resolutions.setdefault(res, {}).setdefault('height', int_or_none(addr.get('height')))
|
||||
known_resolutions[res].setdefault('width', int_or_none(addr.get('width')))
|
||||
parsed_meta.update(known_resolutions.get(res, {}))
|
||||
add_meta.setdefault('height', int_or_none(res[:-1]))
|
||||
return [{
|
||||
|
@ -237,22 +242,26 @@ def extract_addr(addr, add_meta={}):
|
|||
|
||||
# Hack: Add direct video links first to prioritize them when removing duplicate formats
|
||||
formats = []
|
||||
width = int_or_none(video_info.get('width'))
|
||||
height = int_or_none(video_info.get('height'))
|
||||
if video_info.get('play_addr'):
|
||||
formats.extend(extract_addr(video_info['play_addr'], {
|
||||
'format_id': 'play_addr',
|
||||
'format_note': 'Direct video',
|
||||
'vcodec': 'h265' if traverse_obj(
|
||||
video_info, 'is_bytevc1', 'is_h265') else 'h264', # TODO: Check for "direct iOS" videos, like https://www.tiktok.com/@cookierun_dev/video/7039716639834656002
|
||||
'width': video_info.get('width'),
|
||||
'height': video_info.get('height'),
|
||||
'width': width,
|
||||
'height': height,
|
||||
}))
|
||||
if video_info.get('download_addr'):
|
||||
formats.extend(extract_addr(video_info['download_addr'], {
|
||||
download_addr = video_info['download_addr']
|
||||
dl_width = int_or_none(download_addr.get('width'))
|
||||
formats.extend(extract_addr(download_addr, {
|
||||
'format_id': 'download_addr',
|
||||
'format_note': 'Download video%s' % (', watermarked' if video_info.get('has_watermark') else ''),
|
||||
'vcodec': 'h264',
|
||||
'width': video_info.get('width'),
|
||||
'height': video_info.get('height'),
|
||||
'width': dl_width or width,
|
||||
'height': try_call(lambda: int(dl_width / 0.5625)) or height, # download_addr['height'] is wrong
|
||||
'preference': -2 if video_info.get('has_watermark') else -1,
|
||||
}))
|
||||
if video_info.get('play_addr_h264'):
|
||||
|
@ -315,9 +324,6 @@ def extract_addr(addr, add_meta={}):
|
|||
|
||||
return {
|
||||
'id': aweme_id,
|
||||
'extractor_key': TikTokIE.ie_key(),
|
||||
'extractor': TikTokIE.IE_NAME,
|
||||
'webpage_url': self._create_url(author_info.get('uid'), aweme_id),
|
||||
**traverse_obj(aweme_detail, {
|
||||
'title': ('desc', {str}),
|
||||
'description': ('desc', {str}),
|
||||
|
@ -609,11 +615,12 @@ class TikTokIE(TikTokBaseIE):
|
|||
'title': 'md5:1d95c0b96560ca0e8a231af4172b2c0a',
|
||||
'description': 'md5:1d95c0b96560ca0e8a231af4172b2c0a',
|
||||
'creator': 'MoxyPatch',
|
||||
'creators': ['MoxyPatch'],
|
||||
'uploader': 'moxypatch',
|
||||
'uploader_id': '7039142049363379205',
|
||||
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V',
|
||||
'channel_id': 'MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V',
|
||||
'artist': 'your worst nightmare',
|
||||
'artists': ['your worst nightmare'],
|
||||
'track': 'original sound',
|
||||
'upload_date': '20230303',
|
||||
'timestamp': 1677866781,
|
||||
|
@ -651,7 +658,7 @@ class TikTokIE(TikTokBaseIE):
|
|||
'comment_count': int,
|
||||
'thumbnail': r're:^https://.+\.webp',
|
||||
},
|
||||
'params': {'format': 'bytevc1_1080p_808907-0'},
|
||||
'skip': 'Unavailable via feed API, no formats available via web',
|
||||
}, {
|
||||
# Slideshow, audio-only m4a format
|
||||
'url': 'https://www.tiktok.com/@hara_yoimiya/video/7253412088251534594',
|
||||
|
@ -688,24 +695,35 @@ def _real_extract(self, url):
|
|||
try:
|
||||
return self._extract_aweme_app(video_id)
|
||||
except ExtractorError as e:
|
||||
e.expected = True
|
||||
self.report_warning(f'{e}; trying with webpage')
|
||||
|
||||
url = self._create_url(user_id, video_id)
|
||||
webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'})
|
||||
next_data = self._search_nextjs_data(webpage, video_id, default='{}')
|
||||
if next_data:
|
||||
status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode'), expected_type=int) or 0
|
||||
video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct'), expected_type=dict)
|
||||
else:
|
||||
sigi_data = self._get_sigi_state(webpage, video_id)
|
||||
status = traverse_obj(sigi_data, ('VideoPage', 'statusCode'), expected_type=int) or 0
|
||||
video_data = traverse_obj(sigi_data, ('ItemModule', video_id), expected_type=dict)
|
||||
|
||||
if status == 0:
|
||||
if universal_data := self._get_universal_data(webpage, video_id):
|
||||
self.write_debug('Found universal data for rehydration')
|
||||
status = traverse_obj(universal_data, ('webapp.video-detail', 'statusCode', {int})) or 0
|
||||
video_data = traverse_obj(universal_data, ('webapp.video-detail', 'itemInfo', 'itemStruct', {dict}))
|
||||
|
||||
elif sigi_data := self._get_sigi_state(webpage, video_id):
|
||||
self.write_debug('Found sigi state data')
|
||||
status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0
|
||||
video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict}))
|
||||
|
||||
elif next_data := self._search_nextjs_data(webpage, video_id, default='{}'):
|
||||
self.write_debug('Found next.js data')
|
||||
status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0
|
||||
video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict}))
|
||||
|
||||
else:
|
||||
raise ExtractorError('Unable to extract webpage video data')
|
||||
|
||||
if video_data and status == 0:
|
||||
return self._parse_aweme_video_web(video_data, url, video_id)
|
||||
elif status == 10216:
|
||||
raise ExtractorError('This video is private', expected=True)
|
||||
raise ExtractorError('Video not available', video_id=video_id)
|
||||
raise ExtractorError(f'Video not available, status code {status}', video_id=video_id)
|
||||
|
||||
|
||||
class TikTokUserIE(TikTokBaseIE):
|
||||
|
@ -921,20 +939,23 @@ class DouyinIE(TikTokBaseIE):
|
|||
_VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.douyin.com/video/6961737553342991651',
|
||||
'md5': 'a97db7e3e67eb57bf40735c022ffa228',
|
||||
'md5': '9ecce7bc5b302601018ecb2871c63a75',
|
||||
'info_dict': {
|
||||
'id': '6961737553342991651',
|
||||
'ext': 'mp4',
|
||||
'title': '#杨超越 小小水手带你去远航❤️',
|
||||
'description': '#杨超越 小小水手带你去远航❤️',
|
||||
'uploader': '6897520xka',
|
||||
'uploader_id': '110403406559',
|
||||
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||
'creator': '杨超越',
|
||||
'duration': 19782,
|
||||
'creators': ['杨超越'],
|
||||
'duration': 19,
|
||||
'timestamp': 1620905839,
|
||||
'upload_date': '20210513',
|
||||
'track': '@杨超越创作的原声',
|
||||
'artists': ['杨超越'],
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
|
@ -943,20 +964,23 @@ class DouyinIE(TikTokBaseIE):
|
|||
},
|
||||
}, {
|
||||
'url': 'https://www.douyin.com/video/6982497745948921092',
|
||||
'md5': '34a87ebff3833357733da3fe17e37c0e',
|
||||
'md5': '15c5e660b7048af3707304e3cc02bbb5',
|
||||
'info_dict': {
|
||||
'id': '6982497745948921092',
|
||||
'ext': 'mp4',
|
||||
'title': '这个夏日和小羊@杨超越 一起遇见白色幻想',
|
||||
'description': '这个夏日和小羊@杨超越 一起遇见白色幻想',
|
||||
'uploader': '0731chaoyue',
|
||||
'uploader_id': '408654318141572',
|
||||
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
|
||||
'channel_id': 'MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
|
||||
'creator': '杨超越工作室',
|
||||
'duration': 42479,
|
||||
'creators': ['杨超越工作室'],
|
||||
'duration': 42,
|
||||
'timestamp': 1625739481,
|
||||
'upload_date': '20210708',
|
||||
'track': '@杨超越工作室创作的原声',
|
||||
'artists': ['杨超越工作室'],
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
|
@ -965,20 +989,23 @@ class DouyinIE(TikTokBaseIE):
|
|||
},
|
||||
}, {
|
||||
'url': 'https://www.douyin.com/video/6953975910773099811',
|
||||
'md5': 'dde3302460f19db59c47060ff013b902',
|
||||
'md5': '0e6443758b8355db9a3c34864a4276be',
|
||||
'info_dict': {
|
||||
'id': '6953975910773099811',
|
||||
'ext': 'mp4',
|
||||
'title': '#一起看海 出现在你的夏日里',
|
||||
'description': '#一起看海 出现在你的夏日里',
|
||||
'uploader': '6897520xka',
|
||||
'uploader_id': '110403406559',
|
||||
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||
'creator': '杨超越',
|
||||
'duration': 17343,
|
||||
'creators': ['杨超越'],
|
||||
'duration': 17,
|
||||
'timestamp': 1619098692,
|
||||
'upload_date': '20210422',
|
||||
'track': '@杨超越创作的原声',
|
||||
'artists': ['杨超越'],
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
|
@ -1004,20 +1031,23 @@ class DouyinIE(TikTokBaseIE):
|
|||
'skip': 'No longer available',
|
||||
}, {
|
||||
'url': 'https://www.douyin.com/video/6963263655114722595',
|
||||
'md5': 'cf9f11f0ec45d131445ec2f06766e122',
|
||||
'md5': '1440bcf59d8700f8e014da073a4dfea8',
|
||||
'info_dict': {
|
||||
'id': '6963263655114722595',
|
||||
'ext': 'mp4',
|
||||
'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
|
||||
'description': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
|
||||
'uploader': '6897520xka',
|
||||
'uploader_id': '110403406559',
|
||||
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||
'creator': '杨超越',
|
||||
'duration': 15115,
|
||||
'creators': ['杨超越'],
|
||||
'duration': 15,
|
||||
'timestamp': 1621261163,
|
||||
'upload_date': '20210517',
|
||||
'track': '@杨超越创作的原声',
|
||||
'artists': ['杨超越'],
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
|
@ -1025,34 +1055,23 @@ class DouyinIE(TikTokBaseIE):
|
|||
'thumbnail': r're:https?://.+\.jpe?g',
|
||||
},
|
||||
}]
|
||||
_APP_VERSIONS = [('23.3.0', '230300')]
|
||||
_APP_NAME = 'aweme'
|
||||
_AID = 1128
|
||||
_API_HOSTNAME = 'aweme.snssdk.com'
|
||||
_UPLOADER_URL_FORMAT = 'https://www.douyin.com/user/%s'
|
||||
_WEBPAGE_HOST = 'https://www.douyin.com/'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
try:
|
||||
return self._extract_aweme_app(video_id)
|
||||
except ExtractorError as e:
|
||||
e.expected = True
|
||||
self.to_screen(f'{e}; trying with webpage')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
render_data = self._search_json(
|
||||
r'<script [^>]*\bid=[\'"]RENDER_DATA[\'"][^>]*>', webpage, 'render data', video_id,
|
||||
contains_pattern=r'%7B(?s:.+)%7D', fatal=False, transform_source=compat_urllib_parse_unquote)
|
||||
if not render_data:
|
||||
detail = traverse_obj(self._download_json(
|
||||
'https://www.douyin.com/aweme/v1/web/aweme/detail/', video_id,
|
||||
'Downloading web detail JSON', 'Failed to download web detail JSON',
|
||||
query={'aweme_id': video_id}, fatal=False), ('aweme_detail', {dict}))
|
||||
if not detail:
|
||||
# TODO: Run verification challenge code to generate signature cookies
|
||||
cookies = self._get_cookies(self._WEBPAGE_HOST)
|
||||
expected = not cookies.get('s_v_web_id') or not cookies.get('ttwid')
|
||||
raise ExtractorError(
|
||||
'Fresh cookies (not necessarily logged in) are needed', expected=expected)
|
||||
'Fresh cookies (not necessarily logged in) are needed',
|
||||
expected=not self._get_cookies(self._WEBPAGE_HOST).get('s_v_web_id'))
|
||||
|
||||
return self._parse_aweme_video_web(get_first(render_data, ('aweme', 'detail')), url, video_id)
|
||||
return self._parse_aweme_video_app(detail)
|
||||
|
||||
|
||||
class TikTokVMIE(InfoExtractor):
|
||||
|
@ -1181,7 +1200,7 @@ def _real_extract(self, url):
|
|||
url, uploader or room_id, headers={'User-Agent': 'Mozilla/5.0'}, fatal=not room_id)
|
||||
|
||||
if webpage:
|
||||
data = try_call(lambda: self._get_sigi_state(webpage, uploader or room_id))
|
||||
data = self._get_sigi_state(webpage, uploader or room_id)
|
||||
room_id = (traverse_obj(data, ('UserModule', 'users', ..., 'roomId', {str_or_none}), get_all=False)
|
||||
or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=None)
|
||||
or room_id)
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
parse_qs,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
|
@ -48,17 +49,15 @@ def _unsmuggle_headers(self, url):
|
|||
return url, data, headers
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
webpage = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
token, vuid = self._extract_xsrft_and_vuid(webpage)
|
||||
viewer = self._download_json('https://vimeo.com/_next/viewer', None, 'Downloading login token')
|
||||
data = {
|
||||
'action': 'login',
|
||||
'email': username,
|
||||
'password': password,
|
||||
'service': 'vimeo',
|
||||
'token': token,
|
||||
'token': viewer['xsrft'],
|
||||
}
|
||||
self._set_vimeo_cookie('vuid', vuid)
|
||||
self._set_vimeo_cookie('vuid', viewer['vuid'])
|
||||
try:
|
||||
self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Logging in',
|
||||
|
@ -123,7 +122,13 @@ def _parse_config(self, config, video_id):
|
|||
video_data = config['video']
|
||||
video_title = video_data.get('title')
|
||||
live_event = video_data.get('live_event') or {}
|
||||
is_live = live_event.get('status') == 'started'
|
||||
live_status = {
|
||||
'pending': 'is_upcoming',
|
||||
'active': 'is_upcoming',
|
||||
'started': 'is_live',
|
||||
'ended': 'post_live',
|
||||
}.get(live_event.get('status'))
|
||||
is_live = live_status == 'is_live'
|
||||
request = config.get('request') or {}
|
||||
|
||||
formats = []
|
||||
|
@ -232,7 +237,8 @@ def _parse_config(self, config, video_id):
|
|||
'chapters': chapters or None,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': is_live,
|
||||
'live_status': live_status,
|
||||
'release_timestamp': traverse_obj(live_event, ('ingest', 'scheduled_start_time', {parse_iso8601})),
|
||||
# Note: Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
|
||||
# at the same time without actual units specified.
|
||||
'_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'),
|
||||
|
|
|
@ -114,9 +114,9 @@
|
|||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'ANDROID',
|
||||
'clientVersion': '17.31.35',
|
||||
'clientVersion': '18.11.34',
|
||||
'androidSdkVersion': 30,
|
||||
'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
|
||||
'userAgent': 'com.google.android.youtube/18.11.34 (Linux; U; Android 11) gzip'
|
||||
}
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
|
||||
|
@ -127,9 +127,9 @@
|
|||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'ANDROID_EMBEDDED_PLAYER',
|
||||
'clientVersion': '17.31.35',
|
||||
'clientVersion': '18.11.34',
|
||||
'androidSdkVersion': 30,
|
||||
'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
|
||||
'userAgent': 'com.google.android.youtube/18.11.34 (Linux; U; Android 11) gzip'
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
|
||||
|
@ -168,9 +168,9 @@
|
|||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'IOS',
|
||||
'clientVersion': '17.33.2',
|
||||
'clientVersion': '18.11.34',
|
||||
'deviceModel': 'iPhone14,3',
|
||||
'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
|
||||
'userAgent': 'com.google.ios.youtube/18.11.34 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
|
||||
}
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
|
||||
|
@ -180,9 +180,9 @@
|
|||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'IOS_MESSAGES_EXTENSION',
|
||||
'clientVersion': '17.33.2',
|
||||
'clientVersion': '18.11.34',
|
||||
'deviceModel': 'iPhone14,3',
|
||||
'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
|
||||
'userAgent': 'com.google.ios.youtube/18.11.34 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
|
||||
|
@ -3640,15 +3640,28 @@ def _get_requested_clients(self, url, smuggled_data):
|
|||
|
||||
return orderedSet(requested_clients)
|
||||
|
||||
def _invalid_player_response(self, pr, video_id):
|
||||
# YouTube may return a different video player response than expected.
|
||||
# See: https://github.com/TeamNewPipe/NewPipe/issues/8713
|
||||
if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
|
||||
return pr_id
|
||||
|
||||
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
|
||||
initial_pr = None
|
||||
if webpage:
|
||||
initial_pr = self._search_json(
|
||||
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
|
||||
|
||||
prs = []
|
||||
if initial_pr and not self._invalid_player_response(initial_pr, video_id):
|
||||
# Android player_response does not have microFormats which are needed for
|
||||
# extraction of some data. So we return the initial_pr with formats
|
||||
# stripped out even if not requested by the user
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/501
|
||||
prs.append({**initial_pr, 'streamingData': None})
|
||||
|
||||
all_clients = set(clients)
|
||||
clients = clients[::-1]
|
||||
prs = []
|
||||
|
||||
def append_client(*client_names):
|
||||
""" Append the first client name that exists but not already used """
|
||||
|
@ -3660,18 +3673,9 @@ def append_client(*client_names):
|
|||
all_clients.add(actual_client)
|
||||
return
|
||||
|
||||
# Android player_response does not have microFormats which are needed for
|
||||
# extraction of some data. So we return the initial_pr with formats
|
||||
# stripped out even if not requested by the user
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/501
|
||||
if initial_pr:
|
||||
pr = dict(initial_pr)
|
||||
pr['streamingData'] = None
|
||||
prs.append(pr)
|
||||
|
||||
last_error = None
|
||||
tried_iframe_fallback = False
|
||||
player_url = None
|
||||
skipped_clients = {}
|
||||
while clients:
|
||||
client, base_client, variant = _split_innertube_client(clients.pop())
|
||||
player_ytcfg = master_ytcfg if client == 'web' else {}
|
||||
|
@ -3692,26 +3696,19 @@ def append_client(*client_names):
|
|||
pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
|
||||
client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
|
||||
except ExtractorError as e:
|
||||
if last_error:
|
||||
self.report_warning(last_error)
|
||||
last_error = e
|
||||
self.report_warning(e)
|
||||
continue
|
||||
|
||||
if pr:
|
||||
# YouTube may return a different video player response than expected.
|
||||
# See: https://github.com/TeamNewPipe/NewPipe/issues/8713
|
||||
pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
|
||||
if pr_video_id and pr_video_id != video_id:
|
||||
self.report_warning(
|
||||
f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
|
||||
else:
|
||||
# Save client name for introspection later
|
||||
name = short_client_name(client)
|
||||
sd = traverse_obj(pr, ('streamingData', {dict})) or {}
|
||||
sd[STREAMING_DATA_CLIENT_NAME] = name
|
||||
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
|
||||
f[STREAMING_DATA_CLIENT_NAME] = name
|
||||
prs.append(pr)
|
||||
if pr_id := self._invalid_player_response(pr, video_id):
|
||||
skipped_clients[client] = pr_id
|
||||
elif pr:
|
||||
# Save client name for introspection later
|
||||
name = short_client_name(client)
|
||||
sd = traverse_obj(pr, ('streamingData', {dict})) or {}
|
||||
sd[STREAMING_DATA_CLIENT_NAME] = name
|
||||
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
|
||||
f[STREAMING_DATA_CLIENT_NAME] = name
|
||||
prs.append(pr)
|
||||
|
||||
# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
|
||||
if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
|
||||
|
@ -3722,10 +3719,15 @@ def append_client(*client_names):
|
|||
elif not variant:
|
||||
append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
|
||||
|
||||
if last_error:
|
||||
if not len(prs):
|
||||
raise last_error
|
||||
self.report_warning(last_error)
|
||||
if skipped_clients:
|
||||
self.report_warning(
|
||||
f'Skipping player responses from {"/".join(skipped_clients)} clients '
|
||||
f'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")')
|
||||
if not prs:
|
||||
raise ExtractorError(
|
||||
'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True)
|
||||
elif not prs:
|
||||
raise ExtractorError('Failed to extract any player response')
|
||||
return prs, player_url
|
||||
|
||||
def _needs_live_processing(self, live_status, duration):
|
||||
|
|
118
yt_dlp/extractor/zenporn.py
Normal file
118
yt_dlp/extractor/zenporn.py
Normal file
|
@ -0,0 +1,118 @@
|
|||
import base64
|
||||
import binascii
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, determine_ext, unified_strdate, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ZenPornIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?zenporn\.com/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://zenporn.com/video/15627016/desi-bhabi-ki-chudai',
|
||||
'md5': '07bd576b5920714d74975c054ca28dee',
|
||||
'info_dict': {
|
||||
'id': '9563799',
|
||||
'display_id': '15627016',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:669eafd3bbc688aa29770553b738ada2',
|
||||
'description': '',
|
||||
'thumbnail': 'md5:2fc044a19bab450fef8f1931e7920a18',
|
||||
'upload_date': '20230925',
|
||||
'uploader': 'md5:9fae59847f1f58d1da8f2772016c12f3',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://zenporn.com/video/15570701',
|
||||
'md5': 'acba0d080d692664fcc8c4e5502b1a67',
|
||||
'info_dict': {
|
||||
'id': '2297875',
|
||||
'display_id': '15570701',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:47aebdf87644ec91e8b1a844bc832451',
|
||||
'description': '',
|
||||
'thumbnail': 'https://mstn.nv7s.com/contents/videos_screenshots/2297000/2297875/480x270/1.jpg',
|
||||
'upload_date': '20230921',
|
||||
'uploader': 'Lois Clarke',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://zenporn.com/video/8531117/amateur-students-having-a-fuck-fest-at-club/',
|
||||
'md5': '67411256aa9451449e4d29f3be525541',
|
||||
'info_dict': {
|
||||
'id': '12791908',
|
||||
'display_id': '8531117',
|
||||
'ext': 'mp4',
|
||||
'title': 'Amateur students having a fuck fest at club',
|
||||
'description': '',
|
||||
'thumbnail': 'https://tn.txxx.tube/contents/videos_screenshots/12791000/12791908/288x162/1.jpg',
|
||||
'upload_date': '20191005',
|
||||
'uploader': 'Jackopenass',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://zenporn.com/video/15872038/glad-you-came/',
|
||||
'md5': '296ccab437f5bac6099433768449d8e1',
|
||||
'info_dict': {
|
||||
'id': '111585',
|
||||
'display_id': '15872038',
|
||||
'ext': 'mp4',
|
||||
'title': 'Glad You Came',
|
||||
'description': '',
|
||||
'thumbnail': 'https://vpim.m3pd.com/contents/videos_screenshots/111000/111585/480x270/1.jpg',
|
||||
'upload_date': '20231024',
|
||||
'uploader': 'Martin Rudenko',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}]
|
||||
|
||||
def _gen_info_url(self, ext_domain, extr_id, lifetime=86400):
|
||||
""" This function is a reverse engineering from the website javascript """
|
||||
result = '/'.join(str(int(extr_id) // i * i) for i in (1_000_000, 1_000, 1))
|
||||
return f'https://{ext_domain}/api/json/video/{lifetime}/{result}.json'
|
||||
|
||||
@staticmethod
|
||||
def _decode_video_url(encoded_url):
|
||||
""" This function is a reverse engineering from the website javascript """
|
||||
# Replace lookalike characters and standardize map
|
||||
translation = str.maketrans('АВСЕМ.,~', 'ABCEM+/=')
|
||||
try:
|
||||
return base64.b64decode(encoded_url.translate(translation), validate=True).decode()
|
||||
except (binascii.Error, ValueError):
|
||||
return None
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
ext_domain, video_id = self._search_regex(
|
||||
r'https://(?P<ext_domain>[\w.-]+\.\w{3})/embed/(?P<extr_id>\d+)/',
|
||||
webpage, 'embed info', group=('ext_domain', 'extr_id'))
|
||||
|
||||
info_json = self._download_json(
|
||||
self._gen_info_url(ext_domain, video_id), video_id, fatal=False)
|
||||
|
||||
video_json = self._download_json(
|
||||
f'https://{ext_domain}/api/videofile.php', video_id, query={
|
||||
'video_id': video_id,
|
||||
'lifetime': 8640000,
|
||||
}, note='Downloading video file JSON', errnote='Failed to download video file JSON')
|
||||
|
||||
decoded_url = self._decode_video_url(video_json[0]['video_url'])
|
||||
if not decoded_url:
|
||||
raise ExtractorError('Unable to decode the video url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'ext': traverse_obj(video_json, (0, 'format', {determine_ext})),
|
||||
'url': f'https://{ext_domain}{decoded_url}',
|
||||
'age_limit': 18,
|
||||
**traverse_obj(info_json, ('video', {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('thumb', {url_or_none}),
|
||||
'upload_date': ('post_date', {unified_strdate}),
|
||||
'uploader': ('user', 'username', {str}),
|
||||
})),
|
||||
}
|
|
@ -68,6 +68,7 @@ def __init__(self, logger, verbose=False):
|
|||
def close(self):
|
||||
for handler in self.handlers.values():
|
||||
handler.close()
|
||||
self.handlers = {}
|
||||
|
||||
def add_handler(self, handler: RequestHandler):
|
||||
"""Add a handler. If a handler of the same RH_KEY exists, it will overwrite it"""
|
||||
|
|
Loading…
Reference in a new issue