[youtube] refactor itag processing

This commit is contained in:
pukkandan 2021-10-31 13:26:44 +05:30
parent da48320075
commit a0bb6ce58d
No known key found for this signature in database
GPG key ID: 0F00D95A001F4698

View file

@ -2434,7 +2434,7 @@ def append_client(client_name):
return prs, player_url return prs, player_url
def _extract_formats(self, streaming_data, video_id, player_url, is_live): def _extract_formats(self, streaming_data, video_id, player_url, is_live):
itags, stream_ids = [], [] itags, stream_ids = {}, []
itag_qualities, res_qualities = {}, {} itag_qualities, res_qualities = {}, {}
q = qualities([ q = qualities([
# Normally tiny is the smallest video-only formats. But # Normally tiny is the smallest video-only formats. But
@ -2498,7 +2498,7 @@ def _extract_formats(self, streaming_data, video_id, player_url, is_live):
throttled = True throttled = True
if itag: if itag:
itags.append(itag) itags[itag] = 'https'
stream_ids.append(stream_id) stream_ids.append(stream_id)
tbr = float_or_none( tbr = float_or_none(
@ -2548,46 +2548,36 @@ def _extract_formats(self, streaming_data, video_id, player_url, is_live):
and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)) and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True) get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
def guess_quality(f): def process_manifest_format(f, proto, itag):
for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)): if itag in itags:
if val in qdict: if itags[itag] == proto or f'{itag}-{proto}' in itags:
return q(qdict[val]) return False
return -1 itag = f'{itag}-{proto}'
if itag:
f['format_id'] = itag
itags[itag] = proto
f['quality'] = next((
q(qdict[val])
for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities))
if val in qdict), -1)
return True
for sd in streaming_data: for sd in streaming_data:
hls_manifest_url = get_hls and sd.get('hlsManifestUrl') hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
if hls_manifest_url: if hls_manifest_url:
for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False): for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
itag = self._search_regex( if process_manifest_format(f, 'hls', self._search_regex(
r'/itag/(\d+)', f['url'], 'itag', default=None) r'/itag/(\d+)', f['url'], 'itag', default=None)):
if itag in itags: yield f
itag += '-hls'
if itag in itags:
continue
if itag:
f['format_id'] = itag
itags.append(itag)
f['quality'] = guess_quality(f)
yield f
dash_manifest_url = get_dash and sd.get('dashManifestUrl') dash_manifest_url = get_dash and sd.get('dashManifestUrl')
if dash_manifest_url: if dash_manifest_url:
for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False): for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
itag = f['format_id'] if process_manifest_format(f, 'dash', f['format_id']):
if itag in itags: f['filesize'] = int_or_none(self._search_regex(
itag += '-dash' r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
if itag in itags: yield f
continue
if itag:
f['format_id'] = itag
itags.append(itag)
f['quality'] = guess_quality(f)
filesize = int_or_none(self._search_regex(
r'/clen/(\d+)', f.get('fragment_base_url')
or f['url'], 'file size', default=None))
if filesize:
f['filesize'] = filesize
yield f
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})