[channel9] Extract more formats

This commit is contained in:
Sergey M․ 2017-03-23 23:47:43 +07:00
parent d0572557c2
commit a5d783f525
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -9,6 +9,7 @@
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
clean_html, clean_html,
qualities,
) )
@ -120,22 +121,75 @@ def _real_extract(self, url):
content_data = self._download_json(content_url, content_id) content_data = self._download_json(content_url, content_id)
title = content_data['Title'] title = content_data['Title']
QUALITIES = (
'mp3',
'wmv', 'mp4',
'wmv-low', 'mp4-low',
'wmv-mid', 'mp4-mid',
'wmv-high', 'mp4-high',
)
quality_key = qualities(QUALITIES)
def quality(quality_id, format_url):
return (len(QUALITIES) if '_Source.' in format_url
else quality_key(quality_id))
formats = [] formats = []
qualities = [ urls = set()
'VideoMP4Low',
'VideoWMV', SITE_QUALITIES = {
'VideoMP4Medium', 'MP3': 'mp3',
'VideoMP4High', 'MP4': 'mp4',
'VideoWMVHQ', 'Low Quality WMV': 'wmv-low',
] 'Low Quality MP4': 'mp4-low',
for q in qualities: 'Mid Quality WMV': 'wmv-mid',
q_url = content_data.get(q) 'Mid Quality MP4': 'mp4-mid',
if not q_url: 'High Quality WMV': 'wmv-high',
'High Quality MP4': 'mp4-high',
}
formats_select = self._search_regex(
r'(?s)<select[^>]+name=["\']format[^>]+>(.+?)</select', webpage,
'formats select', default=None)
if formats_select:
for mobj in re.finditer(
r'<option\b[^>]+\bvalue=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*(?P<format>[^<]+?)\s*<',
formats_select):
format_url = mobj.group('url')
if format_url in urls:
continue continue
urls.add(format_url)
format_id = mobj.group('format')
quality_id = SITE_QUALITIES.get(format_id, format_id)
formats.append({ formats.append({
'format_id': q, 'url': format_url,
'url': q_url, 'format_id': quality_id,
'quality': quality(quality_id, format_url),
'vcodec': 'none' if quality_id == 'mp3' else None,
}) })
API_QUALITIES = {
'VideoMP4Low': 'mp4-low',
'VideoWMV': 'wmv-mid',
'VideoMP4Medium': 'mp4-mid',
'VideoMP4High': 'mp4-high',
'VideoWMVHQ': 'wmv-hq',
}
for format_id, q in API_QUALITIES.items():
q_url = content_data.get(format_id)
if not q_url or q_url in urls:
continue
urls.add(q_url)
formats.append({
'url': q_url,
'format_id': q,
'quality': quality(q, q_url),
})
self._sort_formats(formats)
slides = content_data.get('Slides') slides = content_data.get('Slides')
zip_file = content_data.get('ZipFile') zip_file = content_data.get('ZipFile')