[theplatform] Use subtitles from SMIL, too

This commit is contained in:
Yen Chi Hsuan 2015-08-21 01:37:43 +08:00
parent 912e0b7e46
commit c687ac745b

View file

@ -28,7 +28,7 @@
class ThePlatformBaseIE(InfoExtractor): class ThePlatformBaseIE(InfoExtractor):
def _extract_theplatform_smil_formats(self, smil_url, video_id, note='Downloading SMIL data'): def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
meta = self._download_xml(smil_url, video_id, note=note) meta = self._download_xml(smil_url, video_id, note=note)
try: try:
error_msg = next( error_msg = next(
@ -54,7 +54,9 @@ def _extract_theplatform_smil_formats(self, smil_url, video_id, note='Downloadin
self._sort_formats(formats) self._sort_formats(formats)
return formats subtitles = self._parse_smil_subtitles(meta, default_ns)
return formats, subtitles
def get_metadata(self, path, video_id): def get_metadata(self, path, video_id):
info_url = 'http://link.theplatform.com/s/%s?format=preview' % path info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
@ -208,12 +210,14 @@ def _real_extract(self, url):
if sig: if sig:
smil_url = self._sign_url(smil_url, sig['key'], sig['secret']) smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
formats = self._extract_theplatform_smil_formats(smil_url, video_id) formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)
ret = self.get_metadata(path, video_id) ret = self.get_metadata(path, video_id)
combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
ret.update({ ret.update({
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'subtitles': combined_subtitles,
}) })
return ret return ret
@ -251,6 +255,7 @@ def _real_extract(self, url):
entry = feed['entries'][0] entry = feed['entries'][0]
formats = [] formats = []
subtitles = {}
first_video_id = None first_video_id = None
duration = None duration = None
for item in entry['media$content']: for item in entry['media$content']:
@ -259,7 +264,9 @@ def _real_extract(self, url):
if first_video_id is None: if first_video_id is None:
first_video_id = cur_video_id first_video_id = cur_video_id
duration = float_or_none(item.get('plfile$duration')) duration = float_or_none(item.get('plfile$duration'))
formats.extend(self._extract_theplatform_smil_formats(smil_url, video_id, 'Downloading SMIL data for %s' % cur_video_id)) cur_formats, cur_subtitles = self._extract_theplatform_smil(smil_url, video_id, 'Downloading SMIL data for %s' % cur_video_id)
formats.extend(cur_formats)
subtitles = self._merge_subtitles(subtitles, cur_subtitles)
self._sort_formats(formats) self._sort_formats(formats)
@ -273,9 +280,11 @@ def _real_extract(self, url):
categories = [item['media$name'] for item in entry.get('media$categories', [])] categories = [item['media$name'] for item in entry.get('media$categories', [])]
ret = self.get_metadata('%s/%s' % (provider_id, first_video_id), video_id) ret = self.get_metadata('%s/%s' % (provider_id, first_video_id), video_id)
subtitles = self._merge_subtitles(subtitles, ret['subtitles'])
ret.update({ ret.update({
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'subtitles': subtitles,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'duration': duration, 'duration': duration,
'timestamp': timestamp, 'timestamp': timestamp,