From 082703347946949a93814f62e783cddf80d41482 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 2 Dec 2020 21:37:14 +0100 Subject: [PATCH 1/2] [extractor/common] improve Akamai HTTP format extraction - Allow m3u8 manifest without an additional audio format - Fix extraction for qualities starting with a number Solution provided by @nixxo based on: https://stackoverflow.com/a/5984688 --- youtube_dlc/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dlc/extractor/common.py b/youtube_dlc/extractor/common.py index aacdf06fe1..a56465956c 100644 --- a/youtube_dlc/extractor/common.py +++ b/youtube_dlc/extractor/common.py @@ -2624,7 +2624,7 @@ def _extract_akamai_formats(self, manifest_url, video_id, hosts={}): REPL_REGEX = r'https://[^/]+/i/([^,]+),([^/]+),([^/]+).csmil/.+' qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',') qualities_length = len(qualities) - if len(formats) in (qualities_length + 1, qualities_length * 2 + 1): + if len(formats) in (qualities_length, qualities_length + 1, qualities_length * 2, qualities_length * 2 + 1): i = 0 http_formats = [] for f in formats: @@ -2633,7 +2633,7 @@ def _extract_akamai_formats(self, manifest_url, video_id, hosts={}): http_f = f.copy() del http_f['manifest_url'] http_url = re.sub( - REPL_REGEX, protocol + r'://%s/\1%s\3' % (http_host, qualities[i]), f['url']) + REPL_REGEX, protocol + r'://%s/\g<1>%s\3' % (http_host, qualities[i]), f['url']) http_f.update({ 'format_id': http_f['format_id'].replace('hls-', protocol + '-'), 'url': http_url, From 727006d9515441ae44dd034955fd220d5afed9a6 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 3 Dec 2020 00:33:55 +0100 Subject: [PATCH 2/2] [extractor/commons] improve Akamai HTTP formats extraction --- youtube_dlc/extractor/common.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/youtube_dlc/extractor/common.py b/youtube_dlc/extractor/common.py index a56465956c..a5df94e9c9 100644 --- a/youtube_dlc/extractor/common.py +++ b/youtube_dlc/extractor/common.py @@ -2615,20 +2615,20 @@ def _extract_akamai_formats(self, manifest_url, video_id, hosts={}): hls_host = hosts.get('hls') if hls_host: m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url) - formats.extend(self._extract_m3u8_formats( + m3u8_formats = self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) + m3u8_id='hls', fatal=False) + formats.extend(m3u8_formats) http_host = hosts.get('http') - if http_host and 'hdnea=' not in manifest_url: - REPL_REGEX = r'https://[^/]+/i/([^,]+),([^/]+),([^/]+).csmil/.+' + if http_host and m3u8_formats and 'hdnea=' not in m3u8_url: + REPL_REGEX = r'https?://[^/]+/i/([^,]+),([^/]+),([^/]+)\.csmil/.+' qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',') qualities_length = len(qualities) - if len(formats) in (qualities_length, qualities_length + 1, qualities_length * 2, qualities_length * 2 + 1): + if len(m3u8_formats) in (qualities_length, qualities_length + 1): i = 0 - http_formats = [] - for f in formats: - if f['protocol'] == 'm3u8_native' and f['vcodec'] != 'none': + for f in m3u8_formats: + if f['vcodec'] != 'none': for protocol in ('http', 'https'): http_f = f.copy() del http_f['manifest_url'] @@ -2639,9 +2639,8 @@ def _extract_akamai_formats(self, manifest_url, video_id, hosts={}): 'url': http_url, 'protocol': protocol, }) - http_formats.append(http_f) + formats.append(http_f) i += 1 - formats.extend(http_formats) return formats