[youtube] Add storyboard formats

Closes: #1553, https://github.com/ytdl-org/youtube-dl/issues/9868
Related: https://github.com/ytdl-org/youtube-dl/pull/14951
This commit is contained in:
pukkandan 2021-11-17 01:26:23 +05:30
parent d8cf8d97a8
commit 720c309932
No known key found for this signature in database
GPG key ID: 0F00D95A001F4698

View file

@ -9,6 +9,7 @@
import hashlib import hashlib
import itertools import itertools
import json import json
import math
import os.path import os.path
import random import random
import re import re
@ -28,6 +29,7 @@
) )
from ..jsinterp import JSInterpreter from ..jsinterp import JSInterpreter
from ..utils import ( from ..utils import (
bug_reports_message,
bytes_to_intlist, bytes_to_intlist,
clean_html, clean_html,
datetime_from_str, datetime_from_str,
@ -66,6 +68,10 @@
) )
def get_first(obj, keys, **kwargs):
return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
# any clients starting with _ cannot be explicity requested by the user # any clients starting with _ cannot be explicity requested by the user
INNERTUBE_CLIENTS = { INNERTUBE_CLIENTS = {
'web': { 'web': {
@ -2586,6 +2592,41 @@ def process_manifest_format(f, proto, itag):
r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None)) r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
yield f yield f
def _extract_storyboard(self, player_responses, duration):
spec = get_first(
player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
if not spec:
return
base_url = spec.pop()
L = len(spec) - 1
for i, args in enumerate(spec):
args = args.split('#')
counts = list(map(int_or_none, args[:5]))
if len(args) != 8 or not all(counts):
self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
continue
width, height, frame_count, cols, rows = counts
N, sigh = args[6:]
url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
fragment_count = frame_count / (cols * rows)
fragment_duration = duration / fragment_count
yield {
'format_id': f'sb{i}',
'format_note': 'storyboard',
'ext': 'mhtml',
'protocol': 'mhtml',
'acodec': 'none',
'vcodec': 'none',
'url': url,
'width': width,
'height': height,
'fragments': [{
'path': url.replace('$M', str(j)),
'duration': min(fragment_duration, duration - (j * fragment_duration)),
} for j in range(math.ceil(fragment_count))],
}
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
video_id = self._match_id(url) video_id = self._match_id(url)
@ -2603,8 +2644,6 @@ def _real_extract(self, url):
self._get_requested_clients(url, smuggled_data), self._get_requested_clients(url, smuggled_data),
video_id, webpage, master_ytcfg) video_id, webpage, master_ytcfg)
get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
playability_statuses = traverse_obj( playability_statuses = traverse_obj(
player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[]) player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
@ -2700,10 +2739,6 @@ def feed_entry(name):
if reason: if reason:
self.raise_no_formats(reason, expected=True) self.raise_no_formats(reason, expected=True)
# Source is given priority since formats that throttle are given lower source_preference
# When throttling issue is fully fixed, remove this
self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
keywords = get_first(video_details, 'keywords', expected_type=list) or [] keywords = get_first(video_details, 'keywords', expected_type=list) or []
if not keywords and webpage: if not keywords and webpage:
keywords = [ keywords = [
@ -2791,6 +2826,12 @@ def feed_entry(name):
if not duration and live_endtime and live_starttime: if not duration and live_endtime and live_starttime:
duration = live_endtime - live_starttime duration = live_endtime - live_starttime
formats.extend(self._extract_storyboard(player_responses, duration))
# Source is given priority since formats that throttle are given lower source_preference
# When throttling issue is fully fixed, remove this
self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
info = { info = {
'id': video_id, 'id': video_id,
'title': self._live_title(video_title) if is_live else video_title, 'title': self._live_title(video_title) if is_live else video_title,