Merge pull request #6966 from remitamine/kuwo

[kuwo] fix title extraction and update test
This commit is contained in:
Yen Chi Hsuan 2015-09-26 19:28:16 +08:00
commit fe6ad195ae

View file

@ -57,6 +57,7 @@ class KuwoIE(KuwoBaseIE):
'upload_date': '20080122', 'upload_date': '20080122',
'description': 'md5:ed13f58e3c3bf3f7fd9fbc4e5a7aa75c' 'description': 'md5:ed13f58e3c3bf3f7fd9fbc4e5a7aa75c'
}, },
'skip': 'this song has been offline because of copyright issues',
}, { }, {
'url': 'http://www.kuwo.cn/yinyue/6446136/', 'url': 'http://www.kuwo.cn/yinyue/6446136/',
'info_dict': { 'info_dict': {
@ -76,9 +77,11 @@ def _real_extract(self, url):
webpage = self._download_webpage( webpage = self._download_webpage(
url, song_id, note='Download song detail info', url, song_id, note='Download song detail info',
errnote='Unable to get song detail info') errnote='Unable to get song detail info')
if '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage:
raise ExtractorError('this song has been offline because of copyright issues', expected=True)
song_name = self._html_search_regex( song_name = self._html_search_regex(
r'<h1[^>]+title="([^"]+)">', webpage, 'song name') r'(?s)class="(?:[^"\s]+\s+)*title(?:\s+[^"\s]+)*".*?<h1[^>]+title="([^"]+)"', webpage, 'song name')
singer_name = self._html_search_regex( singer_name = self._html_search_regex(
r'<div[^>]+class="s_img">\s*<a[^>]+title="([^>]+)"', r'<div[^>]+class="s_img">\s*<a[^>]+title="([^>]+)"',
webpage, 'singer name', fatal=False) webpage, 'singer name', fatal=False)