[youtube] Filter duplicates in playlists base extractor

This commit is contained in:
Sergey M․ 2016-01-31 17:52:02 +06:00
parent 8e7aad2075
commit 9acd33094d

View file

@ -233,7 +233,7 @@ def extract_videos_from_page(self, page):
class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
def _process_page(self, content): def _process_page(self, content):
for playlist_id in re.findall(r'href="/?playlist\?list=(.+?)"', content): for playlist_id in set(re.findall(r'href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', content)):
yield self.url_result( yield self.url_result(
'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist') 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')