mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-17 21:59:17 +00:00
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors
This commit is contained in:
parent
5d0f84d32c
commit
3793090b1b
84
youtube_dl/extractor/amp.py
Normal file
84
youtube_dl/extractor/amp.py
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AMPIE(InfoExtractor):
|
||||||
|
def _get_media_node(self, item, name, default=None):
|
||||||
|
media_name = 'media-%s' % name
|
||||||
|
media_group = item.get('media-group') or item
|
||||||
|
return media_group.get(media_name) or item.get(media_name) or item.get(name, default)
|
||||||
|
|
||||||
|
# parse Akamai Adaptive Media Player feed
|
||||||
|
def _extract_feed_info(self, url):
|
||||||
|
item = self._download_json(
|
||||||
|
url, None,
|
||||||
|
'Downloading Akamai AMP feed',
|
||||||
|
'Unable to download Akamai AMP feed'
|
||||||
|
)['channel']['item']
|
||||||
|
|
||||||
|
video_id = item['guid']
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
media_thumbnail = self._get_media_node(item, 'thumbnail')
|
||||||
|
if media_thumbnail:
|
||||||
|
if isinstance(media_thumbnail, dict):
|
||||||
|
media_thumbnail = [media_thumbnail]
|
||||||
|
for thumbnail_data in media_thumbnail:
|
||||||
|
thumbnail = thumbnail_data['@attributes']
|
||||||
|
thumbnails.append({
|
||||||
|
'url': self._proto_relative_url(thumbnail['url'], 'http:'),
|
||||||
|
'width': int_or_none(thumbnail.get('width')),
|
||||||
|
'height': int_or_none(thumbnail.get('height')),
|
||||||
|
})
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
media_subtitle = self._get_media_node(item, 'subTitle')
|
||||||
|
if media_subtitle:
|
||||||
|
if isinstance(media_subtitle, dict):
|
||||||
|
media_subtitle = [media_subtitle]
|
||||||
|
for subtitle_data in media_subtitle:
|
||||||
|
subtitle = subtitle_data['@attributes']
|
||||||
|
lang = subtitle.get('lang') or 'en'
|
||||||
|
subtitles[lang] = [{'url': subtitle['href']}]
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
media_content = self._get_media_node(item, 'content')
|
||||||
|
if isinstance(media_content, dict):
|
||||||
|
media_content = [media_content]
|
||||||
|
for media_data in media_content:
|
||||||
|
media = media_data['@attributes']
|
||||||
|
media_type = media['type']
|
||||||
|
if media_type == 'video/f4m':
|
||||||
|
f4m_formats = self._extract_f4m_formats(media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id, f4m_id='hds', fatal=False)
|
||||||
|
if f4m_formats:
|
||||||
|
formats.extend(f4m_formats)
|
||||||
|
elif media_type == 'application/x-mpegURL':
|
||||||
|
m3u8_formats = self._extract_m3u8_formats(media['url'], video_id, m3u8_id='hls', fatal=False)
|
||||||
|
if m3u8_formats:
|
||||||
|
formats.extend(m3u8_formats)
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'format_id': media_data['media-category']['@attributes']['label'],
|
||||||
|
'url': media['url'],
|
||||||
|
'preference': 1,
|
||||||
|
'vbr': int_or_none(media.get('bitrate')),
|
||||||
|
'filesize': int_or_none(media.get('fileSize')),
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._get_media_node(item, 'title'),
|
||||||
|
'description': self._get_media_node(item, 'description'),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'timestamp': parse_iso8601(item.get('pubDate'), ' '),
|
||||||
|
'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .amp import AMPIE
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
|
@ -19,7 +19,7 @@
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DramaFeverBaseIE(InfoExtractor):
|
class DramaFeverBaseIE(AMPIE):
|
||||||
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
|
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
|
||||||
_NETRC_MACHINE = 'dramafever'
|
_NETRC_MACHINE = 'dramafever'
|
||||||
|
|
||||||
|
@ -80,60 +80,24 @@ class DramaFeverIE(DramaFeverBaseIE):
|
||||||
'timestamp': 1404336058,
|
'timestamp': 1404336058,
|
||||||
'upload_date': '20140702',
|
'upload_date': '20140702',
|
||||||
'duration': 343,
|
'duration': 343,
|
||||||
}
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url).replace('/', '.')
|
video_id = self._match_id(url).replace('/', '.')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
feed = self._download_json(
|
info = self._extract_feed_info('http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
|
||||||
'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id,
|
|
||||||
video_id, 'Downloading episode JSON')['channel']['item']
|
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError):
|
if isinstance(e.cause, compat_HTTPError):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Currently unavailable in your country.', expected=True)
|
'Currently unavailable in your country.', expected=True)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
media_group = feed.get('media-group', {})
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for media_content in media_group['media-content']:
|
|
||||||
src = media_content.get('@attributes', {}).get('url')
|
|
||||||
if not src:
|
|
||||||
continue
|
|
||||||
ext = determine_ext(src)
|
|
||||||
if ext == 'f4m':
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
src, video_id, f4m_id='hds'))
|
|
||||||
elif ext == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
src, video_id, 'mp4', m3u8_id='hls'))
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'url': src,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
title = media_group.get('media-title')
|
|
||||||
description = media_group.get('media-description')
|
|
||||||
duration = int_or_none(media_group['media-content'][0].get('@attributes', {}).get('duration'))
|
|
||||||
thumbnail = self._proto_relative_url(
|
|
||||||
media_group.get('media-thumbnail', {}).get('@attributes', {}).get('url'))
|
|
||||||
timestamp = parse_iso8601(feed.get('pubDate'), ' ')
|
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
for media_subtitle in media_group.get('media-subTitle', []):
|
|
||||||
lang = media_subtitle.get('@attributes', {}).get('lang')
|
|
||||||
href = media_subtitle.get('@attributes', {}).get('href')
|
|
||||||
if not lang or not href:
|
|
||||||
continue
|
|
||||||
subtitles[lang] = [{
|
|
||||||
'ext': 'ttml',
|
|
||||||
'url': href,
|
|
||||||
}]
|
|
||||||
|
|
||||||
series_id, episode_number = video_id.split('.')
|
series_id, episode_number = video_id.split('.')
|
||||||
episode_info = self._download_json(
|
episode_info = self._download_json(
|
||||||
# We only need a single episode info, so restricting page size to one episode
|
# We only need a single episode info, so restricting page size to one episode
|
||||||
|
@ -146,21 +110,12 @@ def _real_extract(self, url):
|
||||||
if value:
|
if value:
|
||||||
subfile = value[0].get('subfile') or value[0].get('new_subfile')
|
subfile = value[0].get('subfile') or value[0].get('new_subfile')
|
||||||
if subfile and subfile != 'http://www.dramafever.com/st/':
|
if subfile and subfile != 'http://www.dramafever.com/st/':
|
||||||
subtitles.setdefault('English', []).append({
|
info['subtitiles'].setdefault('English', []).append({
|
||||||
'ext': 'srt',
|
'ext': 'srt',
|
||||||
'url': subfile,
|
'url': subfile,
|
||||||
})
|
})
|
||||||
|
|
||||||
return {
|
return info
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'timestamp': timestamp,
|
|
||||||
'duration': duration,
|
|
||||||
'formats': formats,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class DramaFeverSeriesIE(DramaFeverBaseIE):
|
class DramaFeverSeriesIE(DramaFeverBaseIE):
|
||||||
|
|
|
@ -2,14 +2,14 @@
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .amp import AMPIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class FoxNewsIE(InfoExtractor):
|
class FoxNewsIE(AMPIE):
|
||||||
IE_DESC = 'Fox News and Fox Business Video'
|
IE_DESC = 'Fox News and Fox Business Video'
|
||||||
_VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
|
_VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
|
@ -20,10 +20,10 @@ class FoxNewsIE(InfoExtractor):
|
||||||
'id': '3937480',
|
'id': '3937480',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Frozen in Time',
|
'title': 'Frozen in Time',
|
||||||
'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler',
|
'description': '16-year-old girl is size of toddler',
|
||||||
'duration': 265,
|
'duration': 265,
|
||||||
'timestamp': 1304411491,
|
#'timestamp': 1304411491,
|
||||||
'upload_date': '20110503',
|
#'upload_date': '20110503',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -34,10 +34,10 @@ class FoxNewsIE(InfoExtractor):
|
||||||
'id': '3922535568001',
|
'id': '3922535568001',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
|
'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
|
||||||
'description': "Congressman discusses the president's executive action",
|
'description': "Congressman discusses president's plan",
|
||||||
'duration': 292,
|
'duration': 292,
|
||||||
'timestamp': 1417662047,
|
#'timestamp': 1417662047,
|
||||||
'upload_date': '20141204',
|
#'upload_date': '20141204',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -56,48 +56,6 @@ def _real_extract(self, url):
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
host = mobj.group('host')
|
host = mobj.group('host')
|
||||||
|
|
||||||
video = self._download_json(
|
info = self._extract_feed_info('http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))
|
||||||
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id), video_id)
|
info['id'] = video_id
|
||||||
|
return info
|
||||||
item = video['channel']['item']
|
|
||||||
title = item['title']
|
|
||||||
description = item['description']
|
|
||||||
timestamp = parse_iso8601(item['dc-date'])
|
|
||||||
|
|
||||||
media_group = item['media-group']
|
|
||||||
duration = None
|
|
||||||
formats = []
|
|
||||||
for media in media_group['media-content']:
|
|
||||||
attributes = media['@attributes']
|
|
||||||
video_url = attributes['url']
|
|
||||||
if video_url.endswith('.f4m'):
|
|
||||||
formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id))
|
|
||||||
elif video_url.endswith('.m3u8'):
|
|
||||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv'))
|
|
||||||
elif not video_url.endswith('.smil'):
|
|
||||||
duration = int_or_none(attributes.get('duration'))
|
|
||||||
formats.append({
|
|
||||||
'url': video_url,
|
|
||||||
'format_id': media['media-category']['@attributes']['label'],
|
|
||||||
'preference': 1,
|
|
||||||
'vbr': int_or_none(attributes.get('bitrate')),
|
|
||||||
'filesize': int_or_none(attributes.get('fileSize'))
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
media_thumbnail = media_group['media-thumbnail']['@attributes']
|
|
||||||
thumbnails = [{
|
|
||||||
'url': media_thumbnail['url'],
|
|
||||||
'width': int_or_none(media_thumbnail.get('width')),
|
|
||||||
'height': int_or_none(media_thumbnail.get('height')),
|
|
||||||
}] if media_thumbnail else []
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'duration': duration,
|
|
||||||
'timestamp': timestamp,
|
|
||||||
'formats': formats,
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in a new issue