[bloomberg] Fix ooyala url extraction

Added a helper method to InfoExtractor for searching the ‘twitter:player’ meta property.
Now the OoyalaIE also recognizes the ‘ec’ parameter in the url as the embed code.
This commit is contained in:
Jaime Marquínez Ferrándiz 2014-01-29 18:03:32 +01:00
parent fb2a706d11
commit 0c708f11cb
3 changed files with 7 additions and 3 deletions

View file

@ -24,5 +24,5 @@ def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
name = mobj.group('name') name = mobj.group('name')
webpage = self._download_webpage(url, name) webpage = self._download_webpage(url, name)
ooyala_code = self._search_regex(r'<source src="http://player.ooyala.com/player/[^/]+/([^".]+)', webpage, u'ooyala url') ooyala_url = self._twitter_search_player(webpage)
return OoyalaIE._build_url_result(ooyala_code) return self.url_result(ooyala_url, OoyalaIE.ie_key())

View file

@ -465,6 +465,10 @@ def _media_rating_search(self, html):
} }
return RATING_TABLE.get(rating.lower(), None) return RATING_TABLE.get(rating.lower(), None)
def _twitter_search_player(self, html):
return self._html_search_meta('twitter:player', html,
'twitter card player')
def _sort_formats(self, formats): def _sort_formats(self, formats):
if not formats: if not formats:
raise ExtractorError(u'No video formats found') raise ExtractorError(u'No video formats found')

View file

@ -5,7 +5,7 @@
from ..utils import unescapeHTML from ..utils import unescapeHTML
class OoyalaIE(InfoExtractor): class OoyalaIE(InfoExtractor):
_VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P<id>.+?)(&|$)' _VALID_URL = r'https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=(?P<id>.+?)(&|$)'
_TEST = { _TEST = {
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video