mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-22 08:46:43 +00:00
[vshare] Improve extraction, fix formats sorting and carry long lines
This commit is contained in:
parent
ff31f2d5c3
commit
a2b6aba8de
|
@ -26,13 +26,20 @@ class VShareIE(InfoExtractor):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
return re.findall(
|
||||||
|
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
|
||||||
|
webpage)
|
||||||
|
|
||||||
def _extract_packed(self, webpage):
|
def _extract_packed(self, webpage):
|
||||||
packed = self._search_regex(r'(eval\(function.+)', webpage, 'packed code')
|
packed = self._search_regex(
|
||||||
|
r'(eval\(function.+)', webpage, 'packed code')
|
||||||
unpacked = decode_packed_codes(packed)
|
unpacked = decode_packed_codes(packed)
|
||||||
digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
|
digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
|
||||||
digits = digits.split(',')
|
digits = [int(digit) for digit in digits.split(',')]
|
||||||
digits = [int(digit) for digit in digits]
|
key_digit = self._search_regex(
|
||||||
key_digit = self._search_regex(r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
|
r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
|
||||||
chars = [compat_chr(d - int(key_digit)) for d in digits]
|
chars = [compat_chr(d - int(key_digit)) for d in digits]
|
||||||
return ''.join(chars)
|
return ''.join(chars)
|
||||||
|
|
||||||
|
@ -40,9 +47,11 @@ def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'https://vshare.io/v/%s/width-650/height-430/1' % video_id, video_id)
|
'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
title = self._html_search_regex(
|
||||||
|
r'<title>([^<]+)</title>', webpage, 'title')
|
||||||
title = title.split(' - ')[0]
|
title = title.split(' - ')[0]
|
||||||
|
|
||||||
error = self._html_search_regex(
|
error = self._html_search_regex(
|
||||||
|
@ -51,17 +60,15 @@ def _real_extract(self, url):
|
||||||
if error:
|
if error:
|
||||||
raise ExtractorError(error, expected=True)
|
raise ExtractorError(error, expected=True)
|
||||||
|
|
||||||
unpacked = self._extract_packed(webpage)
|
info = self._parse_html5_media_entries(
|
||||||
video_urls = re.findall(r'<source src="([^"]+)', unpacked)
|
url, '<video>%s</video>' % self._extract_packed(webpage),
|
||||||
formats = [{'url': video_url} for video_url in video_urls]
|
video_id)[0]
|
||||||
return {
|
|
||||||
|
self._sort_formats(info['formats'])
|
||||||
|
|
||||||
|
info.update({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
})
|
||||||
}
|
|
||||||
|
|
||||||
@staticmethod
|
return info
|
||||||
def _extract_urls(webpage):
|
|
||||||
return re.findall(
|
|
||||||
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
|
|
||||||
webpage)
|
|
||||||
|
|
Loading…
Reference in a new issue