From 8a38a194fb08a253986cdbafa02cf699ef76c9a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 5 Mar 2013 20:55:48 +0100 Subject: [PATCH] Add auxiliary methods to InfoExtractor to set the '_type' key and use them for some playlist IEs --- test/test_youtube_lists.py | 35 +++++++++++++++++++++-------------- youtube_dl/InfoExtractors.py | 30 ++++++++++++++++++++++++++---- 2 files changed, 47 insertions(+), 18 deletions(-) diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 055bf69c8..9e91484f8 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -36,31 +36,37 @@ def extract_info(self, url): return url class TestYoutubeLists(unittest.TestCase): + def assertIsPlaylist(self,info): + """Make sure the info has '_type' set to 'playlist'""" + self.assertEqual(info['_type'], 'playlist') + def test_youtube_playlist(self): dl = FakeDownloader() ie = YoutubePlaylistIE(dl) - ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') - ytie_results = [YoutubeIE()._extract_id(url) for url in dl.result] + result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0] + self.assertIsPlaylist(result) + ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE']) def test_issue_673(self): dl = FakeDownloader() ie = YoutubePlaylistIE(dl) - ie.extract('PLBB231211A4F62143') - self.assertTrue(len(dl.result) > 40) + result = ie.extract('PLBB231211A4F62143')[0] + self.assertTrue(len(result['entries']) > 40) def test_youtube_playlist_long(self): dl = FakeDownloader() ie = YoutubePlaylistIE(dl) - ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') - self.assertTrue(len(dl.result) >= 799) + result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0] + self.assertIsPlaylist(result) + self.assertTrue(len(result['entries']) >= 799) def test_youtube_playlist_with_deleted(self): #651 dl = FakeDownloader() ie = YoutubePlaylistIE(dl) - ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') - ytie_results = [YoutubeIE()._extract_id(url) for url in dl.result] + result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0] + ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] self.assertFalse('pElCt5oNDuI' in ytie_results) self.assertFalse('KdPEApIVdWM' in ytie_results) @@ -68,10 +74,11 @@ def test_youtube_course(self): dl = FakeDownloader() ie = YoutubePlaylistIE(dl) # TODO find a > 100 (paginating?) videos course - ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') - self.assertEqual(YoutubeIE()._extract_id(dl.result[0]), 'j9WZyLZCBzs') - self.assertEqual(len(dl.result), 25) - self.assertEqual(YoutubeIE()._extract_id(dl.result[-1]), 'rYefUsYuEp0') + result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0] + entries = result['entries'] + self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs') + self.assertEqual(len(entries), 25) + self.assertEqual(YoutubeIE()._extract_id(entries[-1]['url']), 'rYefUsYuEp0') def test_youtube_channel(self): # I give up, please find a channel that does paginate and test this like test_youtube_playlist_long @@ -80,8 +87,8 @@ def test_youtube_channel(self): def test_youtube_user(self): dl = FakeDownloader() ie = YoutubeUserIE(dl) - ie.extract('https://www.youtube.com/user/TheLinuxFoundation') - self.assertTrue(len(dl.result) >= 320) + result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0] + self.assertTrue(len(result['entries']) >= 320) if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index d79f6068f..895658f49 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -128,6 +128,24 @@ def _download_webpage(self, url_or_request, video_id, note=None, errnote=None): urlh = self._request_webpage(url_or_request, video_id, note, errnote) webpage_bytes = urlh.read() return webpage_bytes.decode('utf-8', 'replace') + + #Methods for following #608 + #They set the correct value of the '_type' key + def video_result(self, video_info): + """Returns a video""" + video_info['_type'] = 'video' + return video_info + def url_result(self, url, ie=None): + """Returns a url that points to a page that should be processed""" + #TODO: ie should be the class used for getting the info + video_info = {'_type': 'url', + 'url': url} + return video_info + def playlist_result(self, entries): + """Returns a playlist""" + video_info = {'_type': 'playlist', + 'entries': entries} + return video_info class YoutubeIE(InfoExtractor): @@ -1756,7 +1774,8 @@ def _real_extract(self, url): else: self._downloader.to_screen(u'[youtube] PL %s: Found %i videos, downloading %i' % (playlist_id, total, len(videos))) - return self._downloader.extract_info_iterable(videos) + url_results = [self.url_result(url) for url in videos] + return [self.playlist_result(url_results)] class YoutubeChannelIE(InfoExtractor): @@ -1807,7 +1826,8 @@ def _real_extract(self, url): self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids] - return self._downloader.extract_info_iterable(urls) + url_entries = [self.url_result(url) for url in urls] + return [self.playlist_result(url_entries)] class YoutubeUserIE(InfoExtractor): @@ -1890,7 +1910,8 @@ def _real_extract(self, url): (username, all_ids_count, len(video_ids))) urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids] - return self._downloader.extract_info_iterable(urls) + url_results = [self.url_result(url) for url in urls] + return [self.playlist_result(url_results)] class BlipTVUserIE(InfoExtractor): @@ -1981,7 +2002,8 @@ def _real_extract(self, url): (self.IE_NAME, username, all_ids_count, len(video_ids))) urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids] - return self._downloader.extract_info_iterable(urls) + url_entries = [self.url_result(url) for url in urls] + return [self.playlist_result(url_entries)] class DepositFilesIE(InfoExtractor):