Improve chapter sanitization

This commit is contained in:
pukkandan 2022-07-07 10:51:47 +05:30
parent 385f7f3895
commit a3976e0760
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39
2 changed files with 10 additions and 7 deletions

View file

@ -2377,13 +2377,18 @@ def sanitize_numeric_fields(info):
self.report_warning('"duration" field is negative, there is an error in extractor') self.report_warning('"duration" field is negative, there is an error in extractor')
chapters = info_dict.get('chapters') or [] chapters = info_dict.get('chapters') or []
if chapters and chapters[0].get('start_time'):
chapters.insert(0, {'start_time': 0})
dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')} dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
for prev, current, next_ in zip( for idx, (prev, current, next_) in enumerate(zip(
(dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)): (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
if current.get('start_time') is None: if current.get('start_time') is None:
current['start_time'] = prev.get('end_time') current['start_time'] = prev.get('end_time')
if not current.get('end_time'): if not current.get('end_time'):
current['end_time'] = next_.get('start_time') current['end_time'] = next_.get('start_time')
if not current.get('title'):
current['title'] = f'<Untitled Chapter {idx}>'
if 'playlist' not in info_dict: if 'playlist' not in info_dict:
# It isn't part of a playlist # It isn't part of a playlist

View file

@ -2764,17 +2764,15 @@ def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration,
if not strict: if not strict:
chapter_list.sort(key=lambda c: c['start_time'] or 0) chapter_list.sort(key=lambda c: c['start_time'] or 0)
chapters = [{'start_time': 0, 'title': '<Untitled>'}] chapters = [{'start_time': 0}]
for idx, chapter in enumerate(chapter_list): for idx, chapter in enumerate(chapter_list):
if chapter['start_time'] is None or not chapter['title']: if chapter['start_time'] is None:
self.report_warning(f'Incomplete chapter {idx}') self.report_warning(f'Incomplete chapter {idx}')
elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration: elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
chapters[-1]['end_time'] = chapter['start_time']
chapters.append(chapter) chapters.append(chapter)
else: else:
self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"') self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
chapters[-1]['end_time'] = duration return chapters[1:]
return chapters if len(chapters) > 1 and chapters[1]['start_time'] else chapters[1:]
def _extract_comment(self, comment_renderer, parent=None): def _extract_comment(self, comment_renderer, parent=None):
comment_id = comment_renderer.get('commentId') comment_id = comment_renderer.get('commentId')