[bloomberg] Fix ooyala url extraction

Added a helper method to InfoExtractor for searching the ‘twitter:player’ meta property. Now the OoyalaIE also recognizes the ‘ec’ parameter in the url as the embed code.
2024-11-27 18:41:29 +00:00 · 2014-01-29 18:03:32 +01:00 · 2014-01-29 18:03:32 +01:00 · 0c708f11cb
parent fb2a706d11
commit 0c708f11cb
3 changed files with 7 additions and 3 deletions
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@ -24,5 +24,5 @@ def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        name = mobj.group('name')
        webpage = self._download_webpage(url, name)
-        ooyala_code = self._search_regex(r'<source src="http://player.ooyala.com/player/[^/]+/([^".]+)', webpage, u'ooyala url')
+        ooyala_url = self._twitter_search_player(webpage)
-        return OoyalaIE._build_url_result(ooyala_code)
+        return self.url_result(ooyala_url, OoyalaIE.ie_key())
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -465,6 +465,10 @@ def _media_rating_search(self, html):
        }
        return RATING_TABLE.get(rating.lower(), None)
    def _twitter_search_player(self, html):
        return self._html_search_meta('twitter:player', html,
            'twitter card player')
    def _sort_formats(self, formats):
        if not formats:
            raise ExtractorError(u'No video formats found')
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@ -5,7 +5,7 @@
 from ..utils import unescapeHTML
 class OoyalaIE(InfoExtractor):
-    _VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P<id>.+?)(&|$)'
+    _VALID_URL = r'https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=(?P<id>.+?)(&|$)'
    _TEST = {
        # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video