From c6af2dd8e5a4ee71e7378d7ad12395dce658f7b3 Mon Sep 17 00:00:00 2001 From: Nil Admirari <50202386+nihil-admirari@users.noreply.github.com> Date: Sun, 19 Sep 2021 03:08:50 +0000 Subject: [PATCH] [SponsorBlock] Improve merge algorithm (#999) Authored by: nihil-admirari --- test/test_postprocessors.py | 34 ++++++++++- yt_dlp/postprocessor/modify_chapters.py | 75 +++++++++++++------------ 2 files changed, 72 insertions(+), 37 deletions(-) diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py index 7d13687696..090c7b47b0 100644 --- a/test/test_postprocessors.py +++ b/test/test_postprocessors.py @@ -461,11 +461,23 @@ def test_remove_marked_arrange_sponsors_TinyChaptersResultingFromCutsAreIgnored( self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([2, 2.5], ['c1', 'c3']), cuts) + def test_remove_marked_arrange_sponsors_SingleTinyChapterIsPreserved(self): + cuts = [self._chapter(0.5, 2, remove=True)] + chapters = self._chapters([2], ['c']) + cuts + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([0.5], ['c']), cuts) + + def test_remove_marked_arrange_sponsors_TinyChapterAtTheStartPrependedToTheNext(self): + cuts = [self._chapter(0.5, 2, remove=True)] + chapters = self._chapters([2, 4], ['c1', 'c2']) + cuts + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([2.5], ['c2']), cuts) + def test_remove_marked_arrange_sponsors_TinyChaptersResultingFromSponsorOverlapAreIgnored(self): chapters = self._chapters([1, 3, 4], ['c1', 'c2', 'c3']) + [ self._sponsor_chapter(1.5, 2.5, 'sponsor')] self._remove_marked_arrange_sponsors_test_impl( - chapters, self._chapters([1.5, 3, 4], ['c1', '[SponsorBlock]: Sponsor', 'c3']), []) + chapters, self._chapters([1.5, 2.5, 4], ['c1', '[SponsorBlock]: Sponsor', 'c3']), []) def test_remove_marked_arrange_sponsors_TinySponsorsOverlapsAreIgnored(self): chapters = self._chapters([2, 3, 5], ['c1', 'c2', 'c3']) + [ @@ -476,6 +488,26 @@ def test_remove_marked_arrange_sponsors_TinySponsorsOverlapsAreIgnored(self): chapters, self._chapters([1, 3, 4, 5], [ 'c1', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', 'c3']), []) + def test_remove_marked_arrange_sponsors_TinySponsorsPrependedToTheNextSponsor(self): + chapters = self._chapters([4], ['c']) + [ + self._sponsor_chapter(1.5, 2, 'sponsor'), + self._sponsor_chapter(2, 4, 'selfpromo') + ] + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([1.5, 4], ['c', '[SponsorBlock]: Unpaid/Self Promotion']), []) + + def test_remove_marked_arrange_sponsors_SmallestSponsorInTheOverlapGetsNamed(self): + self._pp._sponsorblock_chapter_title = '[SponsorBlock]: %(name)s' + chapters = self._chapters([10], ['c']) + [ + self._sponsor_chapter(2, 8, 'sponsor'), + self._sponsor_chapter(4, 6, 'selfpromo') + ] + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([2, 4, 6, 8, 10], [ + 'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', + '[SponsorBlock]: Sponsor', 'c' + ]), []) + def test_make_concat_opts_CommonCase(self): sponsor_chapters = [self._chapter(1, 2, 's1'), self._chapter(10, 20, 's2')] expected = '''ffconcat version 1.0 diff --git a/yt_dlp/postprocessor/modify_chapters.py b/yt_dlp/postprocessor/modify_chapters.py index 9a7ba8effe..2871e16d51 100644 --- a/yt_dlp/postprocessor/modify_chapters.py +++ b/yt_dlp/postprocessor/modify_chapters.py @@ -15,7 +15,7 @@ ) -_TINY_SPONSOR_OVERLAP_DURATION = 1 +_TINY_CHAPTER_DURATION = 1 DEFAULT_SPONSORBLOCK_CHAPTER_TITLE = '[SponsorBlock]: %(category_names)l' @@ -50,7 +50,6 @@ def run(self, info): if not info.get('__real_download'): raise PostProcessingError('Cannot cut video since the real and expected durations mismatch. ' 'Different chapters may have already been removed') - return [], info else: self.write_debug('Expected and actual durations mismatch') @@ -145,38 +144,15 @@ def excess_duration(c): new_chapters = [] - def chapter_length(c): - return c['end_time'] - c['start_time'] - - def original_uncut_chapter(c): - return '_was_cut' not in c and '_categories' not in c - def append_chapter(c): assert 'remove' not in c - length = chapter_length(c) - excess_duration(c) + length = c['end_time'] - c['start_time'] - excess_duration(c) # Chapter is completely covered by cuts or sponsors. if length <= 0: return start = new_chapters[-1]['end_time'] if new_chapters else 0 c.update(start_time=start, end_time=start + length) - # Append without checking for tininess to prevent having - # a completely empty chapter list. - if not new_chapters: - new_chapters.append(c) - return - old_c = new_chapters[-1] - # Merge with the previous if the chapter is tiny. - # Only tiny chapters resulting from a cut can be skipped. - # Chapters that were already tiny in the original list will be preserved. - if not original_uncut_chapter(c) and length < _TINY_SPONSOR_OVERLAP_DURATION: - old_c['end_time'] = c['end_time'] - # Previous tiny chapter was appended for the sake of preventing an empty chapter list. - # Replace it with the current one. - elif not original_uncut_chapter(old_c) and chapter_length(old_c) < _TINY_SPONSOR_OVERLAP_DURATION: - c['start_time'] = old_c['start_time'] - new_chapters[-1] = c - else: - new_chapters.append(c) + new_chapters.append(c) # Turn into a priority queue, index is a tie breaker. # Plain stack sorted by start_time is not enough: after splitting the chapter, @@ -275,10 +251,36 @@ def append_chapter(c): append_chapter(cur_chapter) cur_i, cur_chapter = i, c (append_chapter if 'remove' not in cur_chapter else append_cut)(cur_chapter) + return self._remove_tiny_rename_sponsors(new_chapters), cuts + + def _remove_tiny_rename_sponsors(self, chapters): + new_chapters = [] + for i, c in enumerate(chapters): + # Merge with the previous/next if the chapter is tiny. + # Only tiny chapters resulting from a cut can be skipped. + # Chapters that were already tiny in the original list will be preserved. + if (('_was_cut' in c or '_categories' in c) + and c['end_time'] - c['start_time'] < _TINY_CHAPTER_DURATION): + if not new_chapters: + # Prepend tiny chapter to the next one if possible. + if i < len(chapters) - 1: + chapters[i + 1]['start_time'] = c['start_time'] + continue + else: + old_c = new_chapters[-1] + if i < len(chapters) - 1: + next_c = chapters[i + 1] + # Not a typo: key names in old_c and next_c are really different. + prev_is_sponsor = 'categories' in old_c + next_is_sponsor = '_categories' in next_c + # Preferentially prepend tiny normals to normals and sponsors to sponsors. + if (('_categories' not in c and prev_is_sponsor and not next_is_sponsor) + or ('_categories' in c and not prev_is_sponsor and next_is_sponsor)): + next_c['start_time'] = c['start_time'] + continue + old_c['end_time'] = c['end_time'] + continue - i = -1 - for c in new_chapters.copy(): - i += 1 c.pop('_was_cut', None) cats = c.pop('_categories', None) if cats: @@ -292,12 +294,13 @@ def append_chapter(c): }) outtmpl, tmpl_dict = self._downloader.prepare_outtmpl(self._sponsorblock_chapter_title, c) c['title'] = self._downloader.escape_outtmpl(outtmpl) % tmpl_dict - if i > 0 and c['title'] == new_chapters[i - 1]['title']: - new_chapters[i - 1]['end_time'] = c['end_time'] - new_chapters.pop(i) - i -= 1 - - return new_chapters, cuts + # Merge identically named sponsors. + if (new_chapters and 'categories' in new_chapters[-1] + and new_chapters[-1]['title'] == c['title']): + new_chapters[-1]['end_time'] = c['end_time'] + continue + new_chapters.append(c) + return new_chapters def remove_chapters(self, filename, ranges_to_cut, concat_opts, force_keyframes=False): in_file = filename