mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-30 12:01:28 +00:00
[LinkedInLearning] Add subtitles (#1077)
Authored by: Ashish0804 Closes #1072
This commit is contained in:
parent
e99b2d2771
commit
8dc831f715
|
@ -1,6 +1,7 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from itertools import zip_longest
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@ -8,6 +9,8 @@
|
|||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
srt_subtitles_timecode,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
@ -86,6 +89,16 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
|
|||
},
|
||||
}
|
||||
|
||||
def json2srt(self, transcript_lines, duration=None):
|
||||
srt_data = ''
|
||||
for line, (line_dict, next_dict) in enumerate(zip_longest(transcript_lines, transcript_lines[1:])):
|
||||
start_time, caption = line_dict['transcriptStartAt'] / 1000, line_dict['caption']
|
||||
end_time = next_dict['transcriptStartAt'] / 1000 if next_dict else duration or start_time + 1
|
||||
srt_data += '%d\n%s --> %s\n%s\n' % (line + 1, srt_subtitles_timecode(start_time),
|
||||
srt_subtitles_timecode(end_time),
|
||||
caption)
|
||||
return srt_data
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_slug, video_slug = self._match_valid_url(url).groups()
|
||||
|
||||
|
@ -101,6 +114,7 @@ def _real_extract(self, url):
|
|||
formats.append({
|
||||
'format_id': 'progressive-%dp' % height,
|
||||
'url': progressive_url,
|
||||
'ext': 'mp4',
|
||||
'height': height,
|
||||
'width': width,
|
||||
'source_preference': 1,
|
||||
|
@ -128,6 +142,14 @@ def _real_extract(self, url):
|
|||
# However, unless someone can confirm this, the old
|
||||
# behaviour is being kept as-is
|
||||
self._sort_formats(formats, ('res', 'source_preference'))
|
||||
subtitles = {}
|
||||
duration = int_or_none(video_data.get('durationInSeconds'))
|
||||
transcript_lines = try_get(video_data, lambda x: x['transcript']['lines'], expected_type=list)
|
||||
if transcript_lines:
|
||||
subtitles['en'] = [{
|
||||
'ext': 'srt',
|
||||
'data': self.json2srt(transcript_lines, duration)
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': self._get_video_id(video_data, course_slug, video_slug),
|
||||
|
@ -135,7 +157,8 @@ def _real_extract(self, url):
|
|||
'formats': formats,
|
||||
'thumbnail': video_data.get('defaultThumbnail'),
|
||||
'timestamp': float_or_none(video_data.get('publishedOn'), 1000),
|
||||
'duration': int_or_none(video_data.get('durationInSeconds')),
|
||||
'duration': duration,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue