From ecfef3e5bf1bea8a9881b950b4239a0e1b09d10e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 7 Jan 2014 09:41:13 +0100 Subject: [PATCH] +unicode_literals --- test/test_playlists.py | 62 +++++++++++++++++++----------------- youtube_dl/extractor/imdb.py | 37 +++++++++++---------- 2 files changed, 52 insertions(+), 47 deletions(-) diff --git a/test/test_playlists.py b/test/test_playlists.py index 3229823b69..b3bfbd9238 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -1,6 +1,8 @@ #!/usr/bin/env python # encoding: utf-8 +from __future__ import unicode_literals + # Allow direct execution import os import sys @@ -43,7 +45,7 @@ def test_dailymotion_playlist(self): ie = DailymotionPlaylistIE(dl) result = ie.extract('http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q') self.assertIsPlaylist(result) - self.assertEqual(result['title'], u'SPORT') + self.assertEqual(result['title'], 'SPORT') self.assertTrue(len(result['entries']) > 20) def test_dailymotion_user(self): @@ -51,7 +53,7 @@ def test_dailymotion_user(self): ie = DailymotionUserIE(dl) result = ie.extract('http://www.dailymotion.com/user/generation-quoi/') self.assertIsPlaylist(result) - self.assertEqual(result['title'], u'Génération Quoi') + self.assertEqual(result['title'], 'Génération Quoi') self.assertTrue(len(result['entries']) >= 26) def test_vimeo_channel(self): @@ -59,7 +61,7 @@ def test_vimeo_channel(self): ie = VimeoChannelIE(dl) result = ie.extract('http://vimeo.com/channels/tributes') self.assertIsPlaylist(result) - self.assertEqual(result['title'], u'Vimeo Tributes') + self.assertEqual(result['title'], 'Vimeo Tributes') self.assertTrue(len(result['entries']) > 24) def test_vimeo_user(self): @@ -67,7 +69,7 @@ def test_vimeo_user(self): ie = VimeoUserIE(dl) result = ie.extract('http://vimeo.com/nkistudio/videos') self.assertIsPlaylist(result) - self.assertEqual(result['title'], u'Nki') + self.assertEqual(result['title'], 'Nki') self.assertTrue(len(result['entries']) > 65) def test_vimeo_album(self): @@ -75,7 +77,7 @@ def test_vimeo_album(self): ie = VimeoAlbumIE(dl) result = ie.extract('http://vimeo.com/album/2632481') self.assertIsPlaylist(result) - self.assertEqual(result['title'], u'Staff Favorites: November 2013') + self.assertEqual(result['title'], 'Staff Favorites: November 2013') self.assertTrue(len(result['entries']) > 12) def test_vimeo_groups(self): @@ -83,7 +85,7 @@ def test_vimeo_groups(self): ie = VimeoGroupsIE(dl) result = ie.extract('http://vimeo.com/groups/rolexawards') self.assertIsPlaylist(result) - self.assertEqual(result['title'], u'Rolex Awards for Enterprise') + self.assertEqual(result['title'], 'Rolex Awards for Enterprise') self.assertTrue(len(result['entries']) > 72) def test_ustream_channel(self): @@ -91,7 +93,7 @@ def test_ustream_channel(self): ie = UstreamChannelIE(dl) result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty') self.assertIsPlaylist(result) - self.assertEqual(result['id'], u'5124905') + self.assertEqual(result['id'], '5124905') self.assertTrue(len(result['entries']) >= 11) def test_soundcloud_set(self): @@ -99,7 +101,7 @@ def test_soundcloud_set(self): ie = SoundcloudSetIE(dl) result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep') self.assertIsPlaylist(result) - self.assertEqual(result['title'], u'The Royal Concept EP') + self.assertEqual(result['title'], 'The Royal Concept EP') self.assertTrue(len(result['entries']) >= 6) def test_soundcloud_user(self): @@ -107,7 +109,7 @@ def test_soundcloud_user(self): ie = SoundcloudUserIE(dl) result = ie.extract('https://soundcloud.com/the-concept-band') self.assertIsPlaylist(result) - self.assertEqual(result['id'], u'9615865') + self.assertEqual(result['id'], '9615865') self.assertTrue(len(result['entries']) >= 12) def test_livestream_event(self): @@ -115,7 +117,7 @@ def test_livestream_event(self): ie = LivestreamIE(dl) result = ie.extract('http://new.livestream.com/tedx/cityenglish') self.assertIsPlaylist(result) - self.assertEqual(result['title'], u'TEDCity2.0 (English)') + self.assertEqual(result['title'], 'TEDCity2.0 (English)') self.assertTrue(len(result['entries']) >= 4) def test_nhl_videocenter(self): @@ -123,8 +125,8 @@ def test_nhl_videocenter(self): ie = NHLVideocenterIE(dl) result = ie.extract('http://video.canucks.nhl.com/videocenter/console?catid=999') self.assertIsPlaylist(result) - self.assertEqual(result['id'], u'999') - self.assertEqual(result['title'], u'Highlights') + self.assertEqual(result['id'], '999') + self.assertEqual(result['title'], 'Highlights') self.assertEqual(len(result['entries']), 12) def test_bambuser_channel(self): @@ -132,7 +134,7 @@ def test_bambuser_channel(self): ie = BambuserChannelIE(dl) result = ie.extract('http://bambuser.com/channel/pixelversity') self.assertIsPlaylist(result) - self.assertEqual(result['title'], u'pixelversity') + self.assertEqual(result['title'], 'pixelversity') self.assertTrue(len(result['entries']) >= 60) def test_bandcamp_album(self): @@ -140,7 +142,7 @@ def test_bandcamp_album(self): ie = BandcampAlbumIE(dl) result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep') self.assertIsPlaylist(result) - self.assertEqual(result['title'], u'Nightmare Night EP') + self.assertEqual(result['title'], 'Nightmare Night EP') self.assertTrue(len(result['entries']) >= 4) def test_smotri_community(self): @@ -148,8 +150,8 @@ def test_smotri_community(self): ie = SmotriCommunityIE(dl) result = ie.extract('http://smotri.com/community/video/kommuna') self.assertIsPlaylist(result) - self.assertEqual(result['id'], u'kommuna') - self.assertEqual(result['title'], u'КПРФ') + self.assertEqual(result['id'], 'kommuna') + self.assertEqual(result['title'], 'КПРФ') self.assertTrue(len(result['entries']) >= 4) def test_smotri_user(self): @@ -157,17 +159,17 @@ def test_smotri_user(self): ie = SmotriUserIE(dl) result = ie.extract('http://smotri.com/user/inspector') self.assertIsPlaylist(result) - self.assertEqual(result['id'], u'inspector') - self.assertEqual(result['title'], u'Inspector') + self.assertEqual(result['id'], 'inspector') + self.assertEqual(result['title'], 'Inspector') self.assertTrue(len(result['entries']) >= 9) def test_AcademicEarthCourse(self): dl = FakeYDL() ie = AcademicEarthCourseIE(dl) - result = ie.extract(u'http://academicearth.org/courses/building-dynamic-websites/') + result = ie.extract('http://academicearth.org/courses/building-dynamic-websites/') self.assertIsPlaylist(result) - self.assertEqual(result['id'], u'building-dynamic-websites') - self.assertEqual(result['title'], u'Building Dynamic Websites') + self.assertEqual(result['id'], 'building-dynamic-websites') + self.assertEqual(result['title'], 'Building Dynamic Websites') self.assertEqual(result['description'], u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.") self.assertEqual(len(result['entries']), 10) @@ -176,8 +178,8 @@ def test_ivi_compilation(self): ie = IviCompilationIE(dl) result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel') self.assertIsPlaylist(result) - self.assertEqual(result['id'], u'dezhurnyi_angel') - self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012)') + self.assertEqual(result['id'], 'dezhurnyi_angel') + self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012)') self.assertTrue(len(result['entries']) >= 36) def test_ivi_compilation_season(self): @@ -185,8 +187,8 @@ def test_ivi_compilation_season(self): ie = IviCompilationIE(dl) result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel/season2') self.assertIsPlaylist(result) - self.assertEqual(result['id'], u'dezhurnyi_angel/season2') - self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012) 2 сезон') + self.assertEqual(result['id'], 'dezhurnyi_angel/season2') + self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012) 2 сезон') self.assertTrue(len(result['entries']) >= 20) def test_imdb_list(self): @@ -194,8 +196,8 @@ def test_imdb_list(self): ie = ImdbListIE(dl) result = ie.extract('http://www.imdb.com/list/sMjedvGDd8U') self.assertIsPlaylist(result) - self.assertEqual(result['id'], u'sMjedvGDd8U') - self.assertEqual(result['title'], u'Animated and Family Films') + self.assertEqual(result['id'], 'sMjedvGDd8U') + self.assertEqual(result['title'], 'Animated and Family Films') self.assertTrue(len(result['entries']) >= 48) def test_khanacademy_topic(self): @@ -203,9 +205,9 @@ def test_khanacademy_topic(self): ie = KhanAcademyIE(dl) result = ie.extract('https://www.khanacademy.org/math/applied-math/cryptography') self.assertIsPlaylist(result) - self.assertEqual(result['id'], u'cryptography') - self.assertEqual(result['title'], u'Journey into cryptography') - self.assertEqual(result['description'], u'How have humans protected their secret messages through history? What has changed today?') + self.assertEqual(result['id'], 'cryptography') + self.assertEqual(result['title'], 'Journey into cryptography') + self.assertEqual(result['description'], 'How have humans protected their secret messages through history? What has changed today?') self.assertTrue(len(result['entries']) >= 3) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 16926b4d39..f40769eac0 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re import json @@ -9,18 +11,18 @@ class ImdbIE(InfoExtractor): - IE_NAME = u'imdb' - IE_DESC = u'Internet Movie Database trailers' + IE_NAME = 'imdb' + IE_DESC = 'Internet Movie Database trailers' _VALID_URL = r'http://(?:www|m)\.imdb\.com/video/imdb/vi(?P\d+)' _TEST = { - u'url': u'http://www.imdb.com/video/imdb/vi2524815897', - u'md5': u'9f34fa777ade3a6e57a054fdbcb3a068', - u'info_dict': { - u'id': u'2524815897', - u'ext': u'mp4', - u'title': u'Ice Age: Continental Drift Trailer (No. 2) - IMDb', - u'description': u'md5:9061c2219254e5d14e03c25c98e96a81', + 'url': 'http://www.imdb.com/video/imdb/vi2524815897', + 'md5': '9f34fa777ade3a6e57a054fdbcb3a068', + 'info_dict': { + 'id': '2524815897', + 'ext': 'mp4', + 'title': 'Ice Age: Continental Drift Trailer (No. 2) - IMDb', + 'description': 'md5:9061c2219254e5d14e03c25c98e96a81', } } @@ -37,10 +39,10 @@ def _real_extract(self, url): f_path = f_path.strip() format_page = self._download_webpage( compat_urlparse.urljoin(url, f_path), - u'Downloading info for %s format' % f_id) + 'Downloading info for %s format' % f_id) json_data = self._search_regex( r']+class="imdb-player-data"[^>]*?>(.*?)', - format_page, u'json data', flags=re.DOTALL) + format_page, 'json data', flags=re.DOTALL) info = json.loads(json_data) format_info = info['videoPlayerObject']['video'] formats.append({ @@ -56,9 +58,10 @@ def _real_extract(self, url): 'thumbnail': format_info['slate'], } + class ImdbListIE(InfoExtractor): - IE_NAME = u'imdb:list' - IE_DESC = u'Internet Movie Database lists' + IE_NAME = 'imdb:list' + IE_DESC = 'Internet Movie Database lists' _VALID_URL = r'http://www\.imdb\.com/list/(?P[\da-zA-Z_-]{11})' def _real_extract(self, url): @@ -66,13 +69,13 @@ def _real_extract(self, url): list_id = mobj.group('id') # RSS XML is sometimes malformed - rss = self._download_webpage('http://rss.imdb.com/list/%s' % list_id, list_id, u'Downloading list RSS') - list_title = self._html_search_regex(r'(.*?)', rss, u'list title') + rss = self._download_webpage('http://rss.imdb.com/list/%s' % list_id, list_id, 'Downloading list RSS') + list_title = self._html_search_regex(r'(.*?)', rss, 'list title') # Export is independent of actual author_id, but returns 404 if no author_id is provided. # However, passing dummy author_id seems to be enough. csv = self._download_webpage('http://www.imdb.com/list/export?list_id=%s&author_id=ur00000000' % list_id, - list_id, u'Downloading list CSV') + list_id, 'Downloading list CSV') entries = [] for item in csv.split('\n')[1:]: @@ -83,4 +86,4 @@ def _real_extract(self, url): if item_id.startswith('vi'): entries.append(self.url_result('http://www.imdb.com/video/imdb/%s' % item_id, 'Imdb')) - return self.playlist_result(entries, list_id, list_title) \ No newline at end of file + return self.playlist_result(entries, list_id, list_title)