[niconico] Fix extraction of thumbnails and uploader (#3266)

This commit is contained in:
Lesmiscore (Naoya Ozaki) 2022-04-01 19:31:58 +09:00 committed by GitHub
parent e6f868a63c
commit 5d45484cc7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 19 additions and 6 deletions

View file

@ -25,7 +25,10 @@
parse_duration, parse_duration,
parse_filesize, parse_filesize,
parse_iso8601, parse_iso8601,
parse_resolution,
qualities,
remove_start, remove_start,
str_or_none,
traverse_obj, traverse_obj,
try_get, try_get,
unescapeHTML, unescapeHTML,
@ -430,18 +433,25 @@ def get_video_info(*items, get_first=True, **kwargs):
# find in json (logged in) # find in json (logged in)
tags = traverse_obj(api_data, ('tag', 'items', ..., 'name')) tags = traverse_obj(api_data, ('tag', 'items', ..., 'name'))
thumb_prefs = qualities(['url', 'middleUrl', 'largeUrl', 'player', 'ogp'])
return { return {
'id': video_id, 'id': video_id,
'_api_data': api_data, '_api_data': api_data,
'title': get_video_info(('originalTitle', 'title')) or self._og_search_title(webpage, default=None), 'title': get_video_info(('originalTitle', 'title')) or self._og_search_title(webpage, default=None),
'formats': formats, 'formats': formats,
'thumbnail': get_video_info('thumbnail', 'url') or self._html_search_meta( 'thumbnails': [{
('image', 'og:image'), webpage, 'thumbnail', default=None), 'id': key,
'url': url,
'ext': 'jpg',
'preference': thumb_prefs(key),
**parse_resolution(url, lenient=True),
} for key, url in (get_video_info('thumbnail') or {}).items() if url],
'description': clean_html(get_video_info('description')), 'description': clean_html(get_video_info('description')),
'uploader': traverse_obj(api_data, ('owner', 'nickname')), 'uploader': traverse_obj(api_data, ('owner', 'nickname'), ('channel', 'name'), ('community', 'name')),
'uploader_id': str_or_none(traverse_obj(api_data, ('owner', 'id'), ('channel', 'id'), ('community', 'id'))),
'timestamp': parse_iso8601(get_video_info('registeredAt')) or parse_iso8601( 'timestamp': parse_iso8601(get_video_info('registeredAt')) or parse_iso8601(
self._html_search_meta('video:release_date', webpage, 'date published', default=None)), self._html_search_meta('video:release_date', webpage, 'date published', default=None)),
'uploader_id': traverse_obj(api_data, ('owner', 'id')),
'channel': traverse_obj(api_data, ('channel', 'name'), ('community', 'name')), 'channel': traverse_obj(api_data, ('channel', 'name'), ('community', 'name')),
'channel_id': traverse_obj(api_data, ('channel', 'id'), ('community', 'id')), 'channel_id': traverse_obj(api_data, ('channel', 'id'), ('community', 'id')),
'view_count': int_or_none(get_video_info('count', 'view')), 'view_count': int_or_none(get_video_info('count', 'view')),

View file

@ -2418,10 +2418,13 @@ def parse_count(s):
return str_to_int(mobj.group(1)) return str_to_int(mobj.group(1))
def parse_resolution(s): def parse_resolution(s, *, lenient=False):
if s is None: if s is None:
return {} return {}
if lenient:
mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
else:
mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s) mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
if mobj: if mobj:
return { return {