[youtube] Extract alt_title and creator for music videos (Closes #7862)

This commit is contained in:
Sergey M․ 2015-12-14 21:31:53 +06:00
parent 31b2051e21
commit 0cb58b0259

View file

@ -33,6 +33,7 @@
int_or_none, int_or_none,
orderedSet, orderedSet,
parse_duration, parse_duration,
remove_quotes,
remove_start, remove_start,
sanitized_Request, sanitized_Request,
smuggle_url, smuggle_url,
@ -395,12 +396,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20120506', 'upload_date': '20120506',
'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]', 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
'alt_title': 'I Love It (feat. Charli XCX)',
'description': 'md5:782e8651347686cba06e58f71ab51773', 'description': 'md5:782e8651347686cba06e58f71ab51773',
'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli', 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop', 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
'iconic ep', 'iconic', 'love', 'it'], 'iconic ep', 'iconic', 'love', 'it'],
'uploader': 'Icona Pop', 'uploader': 'Icona Pop',
'uploader_id': 'IconaPop', 'uploader_id': 'IconaPop',
'creator': 'Icona Pop',
} }
}, },
{ {
@ -411,9 +414,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20130703', 'upload_date': '20130703',
'title': 'Justin Timberlake - Tunnel Vision (Explicit)', 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
'alt_title': 'Tunnel Vision',
'description': 'md5:64249768eec3bc4276236606ea996373', 'description': 'md5:64249768eec3bc4276236606ea996373',
'uploader': 'justintimberlakeVEVO', 'uploader': 'justintimberlakeVEVO',
'uploader_id': 'justintimberlakeVEVO', 'uploader_id': 'justintimberlakeVEVO',
'creator': 'Justin Timberlake',
'age_limit': 18, 'age_limit': 18,
} }
}, },
@ -492,10 +497,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'id': 'nfWlot6h_JM', 'id': 'nfWlot6h_JM',
'ext': 'm4a', 'ext': 'm4a',
'title': 'Taylor Swift - Shake It Off', 'title': 'Taylor Swift - Shake It Off',
'alt_title': 'Shake It Off',
'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3', 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
'uploader': 'TaylorSwiftVEVO', 'uploader': 'TaylorSwiftVEVO',
'uploader_id': 'TaylorSwiftVEVO', 'uploader_id': 'TaylorSwiftVEVO',
'upload_date': '20140818', 'upload_date': '20140818',
'creator': 'Taylor Swift',
}, },
'params': { 'params': {
'youtube_include_dash_manifest': True, 'youtube_include_dash_manifest': True,
@ -551,9 +558,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20100430', 'upload_date': '20100430',
'uploader_id': 'deadmau5', 'uploader_id': 'deadmau5',
'creator': 'deadmau5',
'description': 'md5:12c56784b8032162bb936a5f76d55360', 'description': 'md5:12c56784b8032162bb936a5f76d55360',
'uploader': 'deadmau5', 'uploader': 'deadmau5',
'title': 'Deadmau5 - Some Chords (HD)', 'title': 'Deadmau5 - Some Chords (HD)',
'alt_title': 'Some Chords',
}, },
'expected_warnings': [ 'expected_warnings': [
'DASH manifest missing', 'DASH manifest missing',
@ -701,10 +710,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'id': 'lsguqyKfVQg', 'id': 'lsguqyKfVQg',
'ext': 'mp4', 'ext': 'mp4',
'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21', 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
'alt_title': 'Dark Walk',
'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a', 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
'upload_date': '20151119', 'upload_date': '20151119',
'uploader_id': 'IronSoulElf', 'uploader_id': 'IronSoulElf',
'uploader': 'IronSoulElf', 'uploader': 'IronSoulElf',
'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -1308,6 +1319,15 @@ def add_dash_mpd(video_info):
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
upload_date = unified_strdate(upload_date) upload_date = unified_strdate(upload_date)
m_music = re.search(
r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li',
video_webpage)
if m_music:
video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
video_creator = clean_html(m_music.group('creator'))
else:
video_alt_title = video_creator = None
m_cat_container = self._search_regex( m_cat_container = self._search_regex(
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>', r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
video_webpage, 'categories', default=None) video_webpage, 'categories', default=None)
@ -1537,7 +1557,9 @@ def _map_to_format_list(urlmap):
'uploader': video_uploader, 'uploader': video_uploader,
'uploader_id': video_uploader_id, 'uploader_id': video_uploader_id,
'upload_date': upload_date, 'upload_date': upload_date,
'creator': video_creator,
'title': video_title, 'title': video_title,
'alt_title': video_alt_title,
'thumbnail': video_thumbnail, 'thumbnail': video_thumbnail,
'description': video_description, 'description': video_description,
'categories': video_categories, 'categories': video_categories,