[extractor/youtube] Bring back _extract_chapters_from_description

Closes #3886
This commit is contained in:
pukkandan 2022-05-29 01:00:41 +05:30
parent 52023f1291
commit 0fe51254cb
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39

View file

@ -2715,6 +2715,21 @@ def _extract_chapters_from_engagement_panel(self, data, duration):
for contents in content_list for contents in content_list
))), []) ))), [])
@staticmethod
def _extract_chapters_from_description(description, duration):
chapters = [{'start_time': 0}]
for timestamp, title in re.findall(
r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''):
start = parse_duration(timestamp)
if start and title and chapters[-1]['start_time'] < start < duration:
chapters[-1]['end_time'] = start
chapters.append({
'start_time': start,
'title': title,
})
chapters[-1]['end_time'] = duration
return chapters[1:]
def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration): def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
chapters = [] chapters = []
last_chapter = {'start_time': 0} last_chapter = {'start_time': 0}
@ -3668,6 +3683,7 @@ def process_language(container, base_url, lang_code, sub_name, query):
info['chapters'] = ( info['chapters'] = (
self._extract_chapters_from_json(initial_data, duration) self._extract_chapters_from_json(initial_data, duration)
or self._extract_chapters_from_engagement_panel(initial_data, duration) or self._extract_chapters_from_engagement_panel(initial_data, duration)
or self._extract_chapters_from_description(video_description, duration)
or None) or None)
contents = traverse_obj( contents = traverse_obj(