[Biqle] Fix extractor (#2731)

Closes #193
Authored by: Bricio
This commit is contained in:
Bricio 2022-02-18 13:02:14 -03:00 committed by GitHub
parent 0ad92dfb18
commit 5625e6073f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 51 additions and 42 deletions

View File

@ -3,27 +3,28 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from .vk import VKIE from .vk import VKIE
from ..compat import ( from ..compat import compat_b64decode
compat_b64decode, from ..utils import (
compat_urllib_parse_unquote, int_or_none,
js_to_json,
traverse_obj,
unified_timestamp,
) )
from ..utils import int_or_none
class BIQLEIE(InfoExtractor): class BIQLEIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)' _VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'
_TESTS = [{ _TESTS = [{
# Youtube embed 'url': 'https://biqle.ru/watch/-2000421746_85421746',
'url': 'https://biqle.ru/watch/-115995369_456239081', 'md5': 'ae6ef4f04d19ac84e4658046d02c151c',
'md5': '97af5a06ee4c29bbf9c001bdb1cf5c06',
'info_dict': { 'info_dict': {
'id': '8v4f-avW-VI', 'id': '-2000421746_85421746',
'ext': 'mp4', 'ext': 'mp4',
'title': "PASSE-PARTOUT - L'ete c'est fait pour jouer", 'title': 'Forsaken By Hope Studio Clip',
'description': 'Passe-Partout', 'description': 'Forsaken By Hope Studio Clip — Смотреть онлайн',
'uploader_id': 'mrsimpsonstef3', 'upload_date': '19700101',
'uploader': 'Phanolito', 'thumbnail': r're:https://[^/]+/impf/7vN3ACwSTgChP96OdOfzFjUCzFR6ZglDQgWsIw/KPaACiVJJxM\.jpg\?size=800x450&quality=96&keep_aspect_ratio=1&background=000000&sign=b48ea459c4d33dbcba5e26d63574b1cb&type=video_thumb',
'upload_date': '20120822', 'timestamp': 0,
}, },
}, { }, {
'url': 'http://biqle.org/watch/-44781847_168547604', 'url': 'http://biqle.org/watch/-44781847_168547604',
@ -32,53 +33,62 @@ class BIQLEIE(InfoExtractor):
'id': '-44781847_168547604', 'id': '-44781847_168547604',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Ребенок в шоке от автоматической мойки', 'title': 'Ребенок в шоке от автоматической мойки',
'description': 'Ребенок в шоке от автоматической мойки — Смотреть онлайн',
'timestamp': 1396633454, 'timestamp': 1396633454,
'uploader': 'Dmitry Kotov',
'upload_date': '20140404', 'upload_date': '20140404',
'uploader_id': '47850140', 'thumbnail': r're:https://[^/]+/c535507/u190034692/video/l_b84df002\.jpg',
}, },
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
embed_url = self._proto_relative_url(self._search_regex(
r'<iframe.+?src="((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^"]+)".*?></iframe>', title = self._html_search_meta('name', webpage, 'Title', fatal=False)
webpage, 'embed url')) timestamp = unified_timestamp(self._html_search_meta('uploadDate', webpage, 'Upload Date', default=None))
description = self._html_search_meta('description', webpage, 'Description', default=None)
global_embed_url = self._search_regex(
r'<script[^<]+?window.globEmbedUrl\s*=\s*\'((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^\']+)\'',
webpage, 'global Embed url')
hash = self._search_regex(
r'<script id="data-embed-video[^<]+?hash: "([^"]+)"[^<]*</script>', webpage, 'Hash')
embed_url = global_embed_url + hash
if VKIE.suitable(embed_url): if VKIE.suitable(embed_url):
return self.url_result(embed_url, VKIE.ie_key(), video_id) return self.url_result(embed_url, VKIE.ie_key(), video_id)
embed_page = self._download_webpage( embed_page = self._download_webpage(
embed_url, video_id, headers={'Referer': url}) embed_url, video_id, 'Downloading embed webpage', headers={'Referer': url})
video_ext = self._get_cookies(embed_url).get('video_ext')
if video_ext: glob_params = self._parse_json(self._search_regex(
video_ext = compat_urllib_parse_unquote(video_ext.value) r'<script id="globParams">[^<]*window.globParams = ([^;]+);[^<]+</script>',
if not video_ext: embed_page, 'Global Parameters'), video_id, transform_source=js_to_json)
video_ext = compat_b64decode(self._search_regex( host_name = compat_b64decode(glob_params['server'][::-1]).decode()
r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)',
embed_page, 'video_ext')).decode()
video_id, sig, _, access_token = video_ext.split(':')
item = self._download_json( item = self._download_json(
'https://api.vk.com/method/video.get', video_id, f'https://{host_name}/method/video.get/{video_id}', video_id,
headers={'User-Agent': 'okhttp/3.4.1'}, query={ headers={'Referer': url}, query={
'access_token': access_token, 'token': glob_params['video']['access_token'],
'sig': sig,
'v': 5.44,
'videos': video_id, 'videos': video_id,
'ckey': glob_params['c_key'],
'credentials': glob_params['video']['credentials'],
})['response']['items'][0] })['response']['items'][0]
title = item['title']
formats = [] formats = []
for f_id, f_url in item.get('files', {}).items(): for f_id, f_url in item.get('files', {}).items():
if f_id == 'external': if f_id == 'external':
return self.url_result(f_url) return self.url_result(f_url)
ext, height = f_id.split('_') ext, height = f_id.split('_')
formats.append({ height_extra_key = traverse_obj(glob_params, ('video', 'partial', 'quality', height))
'format_id': height + 'p', if height_extra_key:
'url': f_url, formats.append({
'height': int_or_none(height), 'format_id': f'{height}p',
'ext': ext, 'url': f'https://{host_name}/{f_url[8:]}&videos={video_id}&extra_key={height_extra_key}',
}) 'height': int_or_none(height),
'ext': ext,
})
self._sort_formats(formats) self._sort_formats(formats)
thumbnails = [] thumbnails = []
@ -96,10 +106,9 @@ class BIQLEIE(InfoExtractor):
'title': title, 'title': title,
'formats': formats, 'formats': formats,
'comment_count': int_or_none(item.get('comments')), 'comment_count': int_or_none(item.get('comments')),
'description': item.get('description'), 'description': description,
'duration': int_or_none(item.get('duration')), 'duration': int_or_none(item.get('duration')),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'timestamp': int_or_none(item.get('date')), 'timestamp': timestamp,
'uploader': item.get('owner_id'),
'view_count': int_or_none(item.get('views')), 'view_count': int_or_none(item.get('views')),
} }