From befd88b786dc41ff075693fd17bafbc7fa4c100e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 29 Nov 2013 15:25:43 +0100 Subject: [PATCH] [yahoo] Add an extractor for yahoo news (closes #1849) --- youtube_dl/extractor/__init__.py | 6 +++++- youtube_dl/extractor/yahoo.py | 34 +++++++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index fd890e251..664639b53 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -172,7 +172,11 @@ from .xnxx import XNXXIE from .xvideos import XVideosIE from .xtube import XTubeIE -from .yahoo import YahooIE, YahooSearchIE +from .yahoo import ( + YahooIE, + YahooNewsIE, + YahooSearchIE, +) from .youjizz import YouJizzIE from .youku import YoukuIE from .youporn import YouPornIE diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 617e3bb06..2d87e81b2 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -53,8 +53,11 @@ def _real_extract(self, url): # The 'meta' field is not always in the video webpage, we request it # from another page long_id = info['id'] + return self._get_info(info['id'], video_id) + + def _get_info(self, long_id, video_id): query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"' - ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2"' % long_id) + ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="US"' % long_id) data = compat_urllib_parse.urlencode({ 'q': query, 'env': 'prod', @@ -100,6 +103,35 @@ def _real_extract(self, url): } +class YahooNewsIE(YahooIE): + IE_NAME = 'yahoo:news' + _VALID_URL = r'http://news\.yahoo\.com/video/.*?-(?P\d*?)\.html' + + _TEST = { + u'url': u'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html', + u'info_dict': { + u'id': u'104538833', + u'ext': u'flv', + u'title': u'China Moses Is Crazy About the Blues', + u'description': u'md5:9900ab8cd5808175c7b3fe55b979bed0', + }, + u'params': { + # Requires rtmpdump + u'skip_download': True, + }, + } + + # Overwrite YahooIE properties we don't want + _TESTS = [] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + long_id = self._search_regex(r'contentId: \'(.+?)\',', webpage, u'long id') + return self._get_info(long_id, video_id) + + class YahooSearchIE(SearchInfoExtractor): IE_DESC = u'Yahoo screen search' _MAX_RESULTS = 1000