[pluralsight] Add support for subtitles (Closes #9681)

This commit is contained in:
Sergey M․ 2016-08-24 08:41:52 +07:00
parent c86f51ee38
commit 8c3e35dd44
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -1,9 +1,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import json
import random
import collections import collections
import json
import os
import random
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
@ -12,10 +13,12 @@
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none,
int_or_none, int_or_none,
parse_duration, parse_duration,
qualities, qualities,
sanitized_Request, sanitized_Request,
srt_subtitles_timecode,
urlencode_postdata, urlencode_postdata,
) )
@ -91,6 +94,51 @@ def _login(self):
if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')): if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
raise ExtractorError('Unable to log in') raise ExtractorError('Unable to log in')
def _get_subtitles(self, author, clip_id, lang, name, duration, video_id):
captions_post = {
'a': author,
'cn': clip_id,
'lc': lang,
'm': name,
}
captions = self._download_json(
'%s/training/Player/Captions' % self._API_BASE, video_id,
'Downloading captions JSON', 'Unable to download captions JSON',
fatal=False, data=json.dumps(captions_post).encode('utf-8'),
headers={'Content-Type': 'application/json;charset=utf-8'})
if captions:
return {
lang: [{
'ext': 'json',
'data': json.dumps(captions),
}, {
'ext': 'srt',
'data': self._convert_subtitles(duration, captions),
}]
}
@staticmethod
def _convert_subtitles(duration, subs):
srt = ''
for num, current in enumerate(subs):
current = subs[num]
start, text = float_or_none(
current.get('DisplayTimeOffset')), current.get('Text')
if start is None or text is None:
continue
end = duration if num == len(subs) - 1 else float_or_none(
subs[num + 1].get('DisplayTimeOffset'))
srt += os.linesep.join(
(
'%d' % num,
'%s --> %s' % (
srt_subtitles_timecode(start),
srt_subtitles_timecode(end)),
text,
os.linesep,
))
return srt
def _real_extract(self, url): def _real_extract(self, url):
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
@ -138,6 +186,8 @@ def _real_extract(self, url):
if not clip: if not clip:
raise ExtractorError('Unable to resolve clip') raise ExtractorError('Unable to resolve clip')
title = '%s - %s' % (module['title'], clip['title'])
QUALITIES = { QUALITIES = {
'low': {'width': 640, 'height': 480}, 'low': {'width': 640, 'height': 480},
'medium': {'width': 848, 'height': 640}, 'medium': {'width': 848, 'height': 640},
@ -225,18 +275,20 @@ def guess_allowed_qualities():
formats.append(f) formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
# TODO: captions duration = int_or_none(
# http://www.pluralsight.com/training/Player/ViewClip + cap = true clip.get('duration')) or parse_duration(clip.get('formattedDuration'))
# or
# http://www.pluralsight.com/training/Player/Captions # TODO: other languages?
# { a = author, cn = clip_id, lc = end, m = name } subtitles = self.extract_subtitles(
author, clip_id, 'en', name, duration, display_id)
return { return {
'id': clip.get('clipName') or clip['name'], 'id': clip.get('clipName') or clip['name'],
'title': '%s - %s' % (module['title'], clip['title']), 'title': title,
'duration': int_or_none(clip.get('duration')) or parse_duration(clip.get('formattedDuration')), 'duration': duration,
'creator': author, 'creator': author,
'formats': formats 'formats': formats,
'subtitles': subtitles,
} }