[extractor/common] Improve thumbnail extraction for HTML5 entries

This commit is contained in:
Sergey M․ 2018-03-19 23:43:53 +07:00
parent 38f59e2793
commit 6780154e6b
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -2150,8 +2150,8 @@ def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
return formats return formats
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None): def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None):
def absolute_url(video_url): def absolute_url(item_url):
return compat_urlparse.urljoin(base_url, video_url) return urljoin(base_url, item_url)
def parse_content_type(content_type): def parse_content_type(content_type):
if not content_type: if not content_type:
@ -2208,7 +2208,7 @@ def _media_formats(src, cur_media_type, type_info={}):
if src: if src:
_, formats = _media_formats(src, media_type) _, formats = _media_formats(src, media_type)
media_info['formats'].extend(formats) media_info['formats'].extend(formats)
media_info['thumbnail'] = media_attributes.get('poster') media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
if media_content: if media_content:
for source_tag in re.findall(r'<source[^>]+>', media_content): for source_tag in re.findall(r'<source[^>]+>', media_content):
source_attributes = extract_attributes(source_tag) source_attributes = extract_attributes(source_tag)