[foxnews] Add support for iframe embeds (closes #15810, closes #16711)

This commit is contained in:
Sergey M․ 2018-06-20 23:51:14 +07:00
parent c9b983ff82
commit f51f526b0a
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D
2 changed files with 41 additions and 7 deletions

View file

@ -58,6 +58,14 @@ class FoxNewsIE(AMPIE):
}, },
] ]
@staticmethod
def _extract_urls(webpage):
return [
mobj.group('url')
for mobj in re.finditer(
r'<(?:amp-)?iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.foxnews\.com/v/video-embed\.html?.*?\bvideo_id=\d+.*?)\1',
webpage)]
def _real_extract(self, url): def _real_extract(self, url):
host, video_id = re.match(self._VALID_URL, url).groups() host, video_id = re.match(self._VALID_URL, url).groups()
@ -71,18 +79,35 @@ class FoxNewsArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)' _VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
IE_NAME = 'foxnews:article' IE_NAME = 'foxnews:article'
_TEST = { _TESTS = [{
# data-video-id
'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html', 'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
'md5': '62aa5a781b308fdee212ebb6f33ae7ef', 'md5': '83d44e1aff1433e7a29a7b537d1700b5',
'info_dict': { 'info_dict': {
'id': '5116295019001', 'id': '5116295019001',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Trump and Clinton asked to defend positions on Iraq War', 'title': 'Trump and Clinton asked to defend positions on Iraq War',
'description': 'Veterans react on \'The Kelly File\'', 'description': 'Veterans react on \'The Kelly File\'',
'timestamp': 1473299755, 'timestamp': 1473301045,
'upload_date': '20160908', 'upload_date': '20160908',
}, },
} }, {
# iframe embed
'url': 'http://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true',
'info_dict': {
'id': '5748266721001',
'ext': 'flv',
'title': 'Kyle Kashuv has a positive message for the Trump White House',
'description': 'Marjory Stoneman Douglas student disagrees with classmates.',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 229,
'timestamp': 1520594670,
'upload_date': '20180309',
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
@ -90,10 +115,13 @@ def _real_extract(self, url):
video_id = self._html_search_regex( video_id = self._html_search_regex(
r'data-video-id=([\'"])(?P<id>[^\'"]+)\1', r'data-video-id=([\'"])(?P<id>[^\'"]+)\1',
webpage, 'video ID', group='id') webpage, 'video ID', group='id', default=None)
if video_id:
return self.url_result(
'http://video.foxnews.com/v/' + video_id, FoxNewsIE.ie_key())
return self.url_result( return self.url_result(
'http://video.foxnews.com/v/' + video_id, FoxNewsIE._extract_urls(webpage)[0], FoxNewsIE.ie_key())
FoxNewsIE.ie_key())
class FoxNewsInsiderIE(InfoExtractor): class FoxNewsInsiderIE(InfoExtractor):

View file

@ -111,6 +111,7 @@
from .peertube import PeerTubeIE from .peertube import PeerTubeIE
from .indavideo import IndavideoEmbedIE from .indavideo import IndavideoEmbedIE
from .apa import APAIE from .apa import APAIE
from .foxnews import FoxNewsIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -3091,6 +3092,11 @@ def _real_extract(self, url):
return self.playlist_from_matches( return self.playlist_from_matches(
apa_urls, video_id, video_title, ie=APAIE.ie_key()) apa_urls, video_id, video_title, ie=APAIE.ie_key())
foxnews_urls = FoxNewsIE._extract_urls(webpage)
if foxnews_urls:
return self.playlist_from_matches(
foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
sharevideos_urls = [mobj.group('url') for mobj in re.finditer( sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1', r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
webpage)] webpage)]