Add base info extractor class

This commit is contained in:
Lucas 2019-08-18 23:39:06 +02:00
parent 0b3cb7df0d
commit 480f2d89f6

View file

@ -10,7 +10,31 @@
orderedSet, orderedSet,
) )
class DeezerPlaylistIE(InfoExtractor): class DeezerBaseInfoExtractor(InfoExtractor):
def get_data(self, url):
if 'test' not in self._downloader.params:
self._downloader.report_warning('For now, this extractor only supports the 30 second previews. Patches welcome!')
mobj = re.match(self._VALID_URL, url)
id = mobj.group('id')
webpage = self._download_webpage(url, id)
geoblocking_msg = self._html_search_regex(
r'<p class="soon-txt">(.*?)</p>', webpage, 'geoblocking message',
default=None)
if geoblocking_msg is not None:
raise ExtractorError(
'Deezer said: %s' % geoblocking_msg, expected=True)
data_json = self._search_regex(
(r'__DZR_APP_STATE__\s*=\s*({.+?})\s*</script>',
r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n'),
webpage, 'data JSON')
data = json.loads(data_json)
return id, webpage, data
class DeezerPlaylistIE(DeezerBaseInfoExtractor):
_VALID_URL = r'https?://(?:www\.)?deezer\.com/(../)?playlist/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?deezer\.com/(../)?playlist/(?P<id>[0-9]+)'
_TEST = { _TEST = {
'url': 'http://www.deezer.com/playlist/176747451', 'url': 'http://www.deezer.com/playlist/176747451',
@ -24,25 +48,7 @@ class DeezerPlaylistIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
if 'test' not in self._downloader.params: playlist_id, webpage, data = self.get_data(url)
self._downloader.report_warning('For now, this extractor only supports the 30 second previews. Patches welcome!')
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id)
geoblocking_msg = self._html_search_regex(
r'<p class="soon-txt">(.*?)</p>', webpage, 'geoblocking message',
default=None)
if geoblocking_msg is not None:
raise ExtractorError(
'Deezer said: %s' % geoblocking_msg, expected=True)
data_json = self._search_regex(
(r'__DZR_APP_STATE__\s*=\s*({.+?})\s*</script>',
r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n'),
webpage, 'data JSON')
data = json.loads(data_json)
playlist_title = data.get('DATA').get('TITLE') playlist_title = data.get('DATA').get('TITLE')
playlist_uploader = data.get('DATA').get('PARENT_USERNAME') playlist_uploader = data.get('DATA').get('PARENT_USERNAME')