Move common code for extractors based in MTV services to a new base class

Removes the duplication of the thumbnail extraction code (only MTVIE needs to override it)
2024-11-16 13:23:20 +00:00 · 2013-12-03 14:58:24 +01:00 · 2013-12-03 14:58:24 +01:00 · 84db81815a
parent fb7abb31af
commit 84db81815a
4 changed files with 48 additions and 60 deletions
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@ -1,7 +1,7 @@
 import re
 from .common import InfoExtractor
-from .mtv import MTVIE, _media_xml_tag
+from .mtv import MTVServicesInfoExtractor
 from ..utils import (
    compat_str,
    compat_urllib_parse,
@ -11,7 +11,7 @@
 )
-class ComedyCentralIE(MTVIE):
+class ComedyCentralIE(MTVServicesInfoExtractor):
    _VALID_URL = r'http://www.comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
    _FEED_URL = u'http://comedycentral.com/feeds/mrss/'
@ -25,12 +25,6 @@ class ComedyCentralIE(MTVIE):
            u'description': u'After a certain point, breastfeeding becomes c**kblocking.',
        },
    }
    # Overwrite MTVIE properties we don't want
    _TESTS = []
    def _get_thumbnail_url(self, uri, itemdoc):
        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
        return itemdoc.find(search_path).attrib['url']
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
--- a/youtube_dl/extractor/gametrailers.py
+++ b/youtube_dl/extractor/gametrailers.py
@ -1,13 +1,11 @@
 import re
-from .mtv import MTVIE, _media_xml_tag
+from .mtv import MTVServicesInfoExtractor
-class GametrailersIE(MTVIE):
+
-    """
+class GametrailersIE(MTVServicesInfoExtractor):
    Gametrailers use the same videos system as MTVIE, it just changes the feed
    url, where the uri is and the method to get the thumbnails.
    """
    _VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
    _TEST = {
        u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
        u'file': u'70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4',
@ -17,15 +15,9 @@ class GametrailersIE(MTVIE):
            u'description': u'Faith is back!  Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!',
        },
    }
    # Overwrite MTVIE properties we don't want
    _TESTS = []
    _FEED_URL = 'http://www.gametrailers.com/feeds/mrss'
    def _get_thumbnail_url(self, uri, itemdoc):
        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
        return itemdoc.find(search_path).attrib['url']
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@ -10,35 +10,8 @@
 def _media_xml_tag(tag):
    return '{http://search.yahoo.com/mrss/}%s' % tag
 class MTVIE(InfoExtractor):
    _VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
    _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
    _TESTS = [
        {
            u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
            u'file': u'853555.mp4',
            u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
            u'info_dict': {
                u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
                u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
            },
        },
        {
            u'add_ie': ['Vevo'],
            u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
            u'file': u'USCJY1331283.mp4',
            u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
            u'info_dict': {
                u'title': u'Everything Has Changed',
                u'upload_date': u'20130606',
                u'uploader': u'Taylor Swift',
            },
            u'skip': u'VEVO is only available in some countries',
        },
    ]
 class MTVServicesInfoExtractor(InfoExtractor):
    @staticmethod
    def _id_from_uri(uri):
        return uri.split(':')[-1]
@ -53,7 +26,12 @@ def _transform_rtmp_url(rtmp_video_url):
        return base + m.group('finalid')
    def _get_thumbnail_url(self, uri, itemdoc):
-        return 'http://mtv.mtvnimages.com/uri/' + uri
+        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
        thumb_node = itemdoc.find(search_path)
        if thumb_node is None:
            return None
        else:
            return thumb_node.attrib['url']
    def _extract_video_formats(self, metadataXml):
        if '/error_country_block.swf' in metadataXml:
@ -108,6 +86,39 @@ def _get_videos_info(self, uri):
                                         u'Downloading info')
        return [self._get_video_info(item) for item in idoc.findall('.//item')]
 class MTVIE(MTVServicesInfoExtractor):
    _VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
    _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
    _TESTS = [
        {
            u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
            u'file': u'853555.mp4',
            u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
            u'info_dict': {
                u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
                u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
            },
        },
        {
            u'add_ie': ['Vevo'],
            u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
            u'file': u'USCJY1331283.mp4',
            u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
            u'info_dict': {
                u'title': u'Everything Has Changed',
                u'upload_date': u'20130606',
                u'uploader': u'Taylor Swift',
            },
            u'skip': u'VEVO is only available in some countries',
        },
    ]
    def _get_thumbnail_url(self, uri, itemdoc):
        return 'http://mtv.mtvnimages.com/uri/' + uri
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('videoid')
--- a/youtube_dl/extractor/southparkstudios.py
+++ b/youtube_dl/extractor/southparkstudios.py
@ -1,15 +1,14 @@
 import re
-from .mtv import MTVIE, _media_xml_tag
+from .mtv import MTVServicesInfoExtractor
-class SouthParkStudiosIE(MTVIE):
+class SouthParkStudiosIE(MTVServicesInfoExtractor):
    IE_NAME = u'southparkstudios.com'
    _VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
    _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
    # Overwrite MTVIE properties we don't want
    _TESTS = [{
        u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
        u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
@ -19,14 +18,6 @@ class SouthParkStudiosIE(MTVIE):
        },
    }]
    def _get_thumbnail_url(self, uri, itemdoc):
        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
        thumb_node = itemdoc.find(search_path)
        if thumb_node is None:
            return None
        else:
            return thumb_node.attrib['url']
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        url = u'http://www.' + mobj.group(u'url')