[udemy] Extract asset captions

This commit is contained in:
Sergey M․ 2018-05-08 22:57:01 +07:00
parent 0ce76801e8
commit 2fbd86352e
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -18,6 +18,7 @@
int_or_none,
js_to_json,
sanitized_Request,
try_get,
unescapeHTML,
urlencode_postdata,
)
@ -105,7 +106,7 @@ def _download_lecture(self, course_id, lecture_id):
% (course_id, lecture_id),
lecture_id, 'Downloading lecture JSON', query={
'fields[lecture]': 'title,description,view_html,asset',
'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,data',
'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data',
})
def _handle_error(self, response):
@ -308,6 +309,21 @@ def extract_subtitles(track_list):
if isinstance(urls, dict):
extract_formats(urls.get('Video'))
captions = asset.get('captions')
if isinstance(captions, list):
for cc in captions:
if not isinstance(cc, dict):
continue
cc_url = cc.get('url')
if not cc_url or not isinstance(cc_url, compat_str):
continue
lang = try_get(cc, lambda x: x['locale']['locale'], compat_str)
sub_dict = (automatic_captions if cc.get('source') == 'auto'
else subtitles)
sub_dict.setdefault(lang or 'en', []).append({
'url': cc_url,
})
view_html = lecture.get('view_html')
if view_html:
view_html_urls = set()