From f4b052321ba957e11da8e5abda1c2a5b059955ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 5 Sep 2013 22:38:23 +0200 Subject: [PATCH] [youtube] Urls like youtube.com/NASA are now interpreted as users (fixes #1069) Video urls like http://youtube.com/BaW_jenozKc are not valid, but http://youtu.be/BaW_jenozKc is correct. --- test/test_all_urls.py | 29 ++++++++++++++++++++--------- youtube_dl/extractor/youtube.py | 12 +++++++++--- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index c54faa380e..fe4090d187 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -11,6 +11,15 @@ from helper import get_testcases class TestAllURLsMatching(unittest.TestCase): + def setUp(self): + self.ies = gen_extractors() + + def matching_ies(self, url): + return [ie.IE_NAME for ie in self.ies if ie.suitable(url) and ie.IE_NAME != 'generic'] + + def assertMatch(self, url, ie_list): + self.assertEqual(self.matching_ies(url), ie_list) + def test_youtube_playlist_matching(self): self.assertTrue(YoutubePlaylistIE.suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')) self.assertTrue(YoutubePlaylistIE.suitable(u'UUBABnxM4Ar9ten8Mdjj1j0Q')) #585 @@ -24,12 +33,17 @@ def test_youtube_playlist_matching(self): def test_youtube_matching(self): self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M')) self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668 + self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube']) + self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) def test_youtube_channel_matching(self): self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM')) self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')) self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')) + def test_youtube_user_matching(self): + self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user']) + def test_justin_tv_channelid_matching(self): self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv")) self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv")) @@ -63,15 +77,12 @@ def test_no_duplicates(self): self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url)) def test_keywords(self): - ies = gen_extractors() - matching_ies = lambda url: [ie.IE_NAME for ie in ies - if ie.suitable(url) and ie.IE_NAME != 'generic'] - self.assertEqual(matching_ies(':ytsubs'), ['youtube:subscriptions']) - self.assertEqual(matching_ies(':ytsubscriptions'), ['youtube:subscriptions']) - self.assertEqual(matching_ies(':thedailyshow'), ['ComedyCentral']) - self.assertEqual(matching_ies(':tds'), ['ComedyCentral']) - self.assertEqual(matching_ies(':colbertreport'), ['ComedyCentral']) - self.assertEqual(matching_ies(':cr'), ['ComedyCentral']) + self.assertMatch(':ytsubs', ['youtube:subscriptions']) + self.assertMatch(':ytsubscriptions', ['youtube:subscriptions']) + self.assertMatch(':thedailyshow', ['ComedyCentral']) + self.assertMatch(':tds', ['ComedyCentral']) + self.assertMatch(':colbertreport', ['ComedyCentral']) + self.assertMatch(':cr', ['ComedyCentral']) if __name__ == '__main__': diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 782cb1cfb5..1facf1cc51 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -135,7 +135,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): _VALID_URL = r"""^ ( (?:https?://)? # http(s):// (optional) - (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/| + (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/| tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: @@ -146,7 +146,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx) v= ) - )? # optional -> youtube.com/xxxx is OK + )) + |youtu\.be/ # just youtu.be/xxxx + ) )? # all until now is optional -> you can pass the naked ID ([0-9A-Za-z_-]+) # here is it! the YouTube video ID (?(1).+)? # if we found the ID, everything can follow @@ -1013,13 +1015,17 @@ def _real_extract(self, url): class YoutubeUserIE(InfoExtractor): IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)' - _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)' + _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)([A-Za-z0-9_-]+)' _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' _GDATA_PAGE_SIZE = 50 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d' _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]' IE_NAME = u'youtube:user' + def suitable(cls, url): + if YoutubeIE.suitable(url): return False + else: return super(YoutubeUserIE, cls).suitable(url) + def _real_extract(self, url): # Extract username mobj = re.match(self._VALID_URL, url)