diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 31e8f8244..3fee480f3 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -1369,6 +1369,110 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
},
],
},
+ ), (
+ # Clear Key with CENC default_KID
+ 'clearkey_cenc',
+ 'https://media.axprod.net/TestVectors/v7-MultiDRM-SingleKey/Manifest_1080p_ClearKey.mpd', # mpd_url
+ 'https://media.axprod.net/TestVectors/v7-MultiDRM-SingleKey/', # mpd_base_url
+ [{
+ 'manifest_url': 'https://media.axprod.net/TestVectors/v7-MultiDRM-SingleKey/Manifest_1080p_ClearKey.mpd',
+ 'ext': 'mp4',
+ 'format_id': '1',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.64001f',
+ 'tbr': 389.802,
+ 'width': 512,
+ 'height': 288,
+ 'dash_cenc': {
+ 'laurl': 'https://drm-clearkey-testvectors.axtest.net/AcquireLicense',
+ 'key_ids': ['9eb4050de44b4802932e27d75083e266'],
+ },
+ }, {
+ 'manifest_url': 'https://media.axprod.net/TestVectors/v7-MultiDRM-SingleKey/Manifest_1080p_ClearKey.mpd',
+ 'ext': 'mp4',
+ 'format_id': '2',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.64001f',
+ 'tbr': 764.935,
+ 'width': 640,
+ 'height': 360,
+ 'dash_cenc': {
+ 'laurl': 'https://drm-clearkey-testvectors.axtest.net/AcquireLicense',
+ 'key_ids': ['9eb4050de44b4802932e27d75083e266'],
+ },
+ }, {
+ 'manifest_url': 'https://media.axprod.net/TestVectors/v7-MultiDRM-SingleKey/Manifest_1080p_ClearKey.mpd',
+ 'ext': 'mp4',
+ 'format_id': '3',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.640028',
+ 'tbr': 1120.439,
+ 'width': 852,
+ 'height': 480,
+ 'dash_cenc': {
+ 'laurl': 'https://drm-clearkey-testvectors.axtest.net/AcquireLicense',
+ 'key_ids': ['9eb4050de44b4802932e27d75083e266'],
+ },
+ }, {
+ 'manifest_url': 'https://media.axprod.net/TestVectors/v7-MultiDRM-SingleKey/Manifest_1080p_ClearKey.mpd',
+ 'ext': 'mp4',
+ 'format_id': '4',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.640032',
+ 'tbr': 1945.258,
+ 'width': 1280,
+ 'height': 720,
+ 'dash_cenc': {
+ 'laurl': 'https://drm-clearkey-testvectors.axtest.net/AcquireLicense',
+ 'key_ids': ['9eb4050de44b4802932e27d75083e266'],
+ },
+ }, {
+ 'manifest_url': 'https://media.axprod.net/TestVectors/v7-MultiDRM-SingleKey/Manifest_1080p_ClearKey.mpd',
+ 'ext': 'mp4',
+ 'format_id': '5',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.640033',
+ 'tbr': 2726.377,
+ 'width': 1920,
+ 'height': 1080,
+ 'dash_cenc': {
+ 'laurl': 'https://drm-clearkey-testvectors.axtest.net/AcquireLicense',
+ 'key_ids': ['9eb4050de44b4802932e27d75083e266'],
+ },
+ }],
+ {},
+ ), (
+ # default CENC KID overridden via W3C PSSH box, no license server in manifest
+ 'w3c_pssh',
+ 'https://unknown/manifest.mpd', # mpd_url
+ 'https://unknown/', # mpd_base_url
+ [{
+ 'manifest_url': 'https://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': '1',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.64001f',
+ 'tbr': 389.802,
+ 'width': 512,
+ 'height': 288,
+ 'dash_cenc': {
+ 'key_ids': ['43215678123412341234123412341234'],
+ },
+ 'has_drm': True,
+ }],
+ {},
),
]
diff --git a/test/testdata/mpd/clearkey_cenc.mpd b/test/testdata/mpd/clearkey_cenc.mpd
new file mode 100644
index 000000000..40f212383
--- /dev/null
+++ b/test/testdata/mpd/clearkey_cenc.mpd
@@ -0,0 +1,29 @@
+
+
+
+
+
+
+
+ https://drm-clearkey-testvectors.axtest.net/AcquireLicense
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test/testdata/mpd/w3c_pssh.mpd b/test/testdata/mpd/w3c_pssh.mpd
new file mode 100644
index 000000000..d72cd866c
--- /dev/null
+++ b/test/testdata/mpd/w3c_pssh.mpd
@@ -0,0 +1,13 @@
+
+
+
+
+
+ AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAFDIVZ4EjQSNBI0EjQSNBI0AAAAAA==
+
+
+
+
+
+
+
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 3430036f4..1069686a9 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -14,12 +14,14 @@ import netrc
import os
import random
import re
+import struct
import subprocess
import sys
import time
import types
import urllib.parse
import urllib.request
+import uuid
import xml.etree.ElementTree
from ..compat import (
@@ -258,6 +260,15 @@ class InfoExtractor:
* ffmpeg_args_out Extra arguments for ffmpeg downloader (output)
* is_dash_periods Whether the format is a result of merging
multiple DASH periods.
+ * dash_cenc A dictionary of DASH CENC decryption information
+ used by the native DASH downloader when set.
+ * laurl The Clear Key license server URL from which
+ CENC keys will be downloaded.
+ * key_ids List of key IDs (as hex) to request from the ClearKey
+ license server.
+ * key The CENC key (as hex) used to decrypt fragments.
+ If `key` is given, any license server URL and
+ key IDs will be ignored.
RTMP formats can also have the additional fields: page_url,
app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
rtmp_protocol, rtmp_real_time
@@ -2669,7 +2680,10 @@ class InfoExtractor:
assert 'is_dash_periods' not in f, 'format already processed'
f['is_dash_periods'] = True
format_key = tuple(v for k, v in f.items() if k not in (
- ('format_id', 'fragments', 'manifest_stream_number')))
+ ('format_id', 'fragments', 'manifest_stream_number', 'dash_cenc')))
+ if 'dash_cenc' in f:
+ format_key = format_key + tuple(
+ tuple(v) if isinstance(v, list) else v for v in f['dash_cenc'].values())
if format_key not in formats:
formats[format_key] = f
elif 'fragments' in f:
@@ -2703,8 +2717,18 @@ class InfoExtractor:
def _add_ns(path):
return self._xpath_ns(path, namespace)
- def is_drm_protected(element):
- return element.find(_add_ns('ContentProtection')) is not None
+ def extract_drm_info(element):
+ has_drm = False
+ cenc_info = {}
+ for cp_e in element.findall(_add_ns('ContentProtection')):
+ has_drm = True
+ self._extract_mpd_content_protection_info(cp_e, cenc_info)
+ info = {'dash_cenc': cenc_info} if cenc_info else {}
+ if has_drm and not (
+ cenc_info.get('key') or cenc_info.get('laurl') and cenc_info.get('key_ids')
+ ):
+ info['has_drm'] = True
+ return info
def extract_multisegment_info(element, ms_parent_info):
ms_info = ms_parent_info.copy()
@@ -2778,6 +2802,7 @@ class InfoExtractor:
'timescale': 1,
})
for adaptation_set in period.findall(_add_ns('AdaptationSet')):
+ adaptation_set_drm_info = extract_drm_info(adaptation_set)
adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
for representation in adaptation_set.findall(_add_ns('Representation')):
representation_attrib = adaptation_set.attrib.copy()
@@ -2864,8 +2889,8 @@ class InfoExtractor:
'acodec': 'none',
'vcodec': 'none',
}
- if is_drm_protected(adaptation_set) or is_drm_protected(representation):
- f['has_drm'] = True
+ f.update(adaptation_set_drm_info)
+ f.update(extract_drm_info(representation))
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
def prepare_template(template_name, identifiers):
@@ -3026,6 +3051,69 @@ class InfoExtractor:
period_entry['subtitles'][lang or 'und'].append(f)
yield period_entry
+ def _extract_mpd_content_protection_info(self, cp_e, cenc_info):
+ """
+ Extract supported DASH-CENC parameters for an MPD ContentProtection element.
+
+ Called multiple times per extracted format in an MPD (once per ContentProtection element
+ within AdaptationSet and Representation elements). Subclasses may override this method
+ when necessary (such as when the Clear Key license server URL is provided separately
+ from the manifest or when an extractor needs to process the optional data section in W3C
+ PSSH boxes).
+
+ Note that the `has_drm` flag will be set for any format that does not meet one or more
+ of these conditions:
+
+ * Both `laurl` and `key_ids` are set (indicating the native DASH downloader should
+ use the specified Clear Key server URL to retreive the CENC key for this format.
+ * `key_id` is set (indicating the native DASH downloader should use the specified
+ CENC key for this format).
+
+ References:
+ 1. DASH-IF Content Protection Identifiers
+ https://dashif.org/identifiers/content_protection/
+ 2. DASH-IF Content Protection Guidelines
+ https://dashif.org/docs/IOP-Guidelines/DASH-IF-IOP-Part6-v5.0.0.pdf
+ 3. W3C "cenc" Initialization Data Format
+ https://w3c.github.io/encrypted-media/format-registry/initdata/cenc.html
+ """
+ scheme_id = cp_e.get('schemeIdUri')
+ if scheme_id == 'urn:mpeg:dash:mp4protection:2011':
+ if cp_e.get('value') == 'cenc':
+ # ISO/IEC 23009-1 MPEG Common Encryption (CENC)
+ if not cenc_info.get('key_ids'):
+ try:
+ default_kid = uuid.UUID(cp_e.get('{urn:mpeg:cenc:2013}default_KID')).hex
+ cenc_info['key_ids'] = [default_kid]
+ except (ValueError, TypeError):
+ pass
+ elif scheme_id == 'urn:uuid:e2719d58-a985-b3c9-781a-b030af78d30e':
+ # Clear Key DASH-IF
+ for tag, ns in itertools.product(
+ ('Laurl', 'laurl'),
+ ('https://dashif.org/CPS', 'http://dashif.org/guidelines/clearKey'),
+ ):
+ url_e = cp_e.find(self._xpath_ns(tag, ns))
+ if url_e is not None:
+ cenc_info['laurl'] = url_e.text
+ break
+ elif scheme_id == 'urn:uuid:1077efec-c0b2-4d02-ace3-3c1e52e2fb4b':
+ # W3C Common System ID
+ pssh_e = cp_e.find(self._xpath_ns('pssh', 'urn:mpeg:cenc:2013'))
+ if pssh_e is not None:
+ # W3C PSSH box (may contain Clear Key KIDs but can also be used
+ # to store KIDs for other DRM systems)
+ try:
+ pssh_box = base64.b64decode(pssh_e.text)
+ kid_count, = struct.unpack('!L', pssh_box[28:32])
+ kids = []
+ for i in range(kid_count):
+ kid = pssh_box[32 + i * 16:32 + (i + 1) * 16]
+ kids.append(kid.hex())
+ cenc_info['key_ids'] = kids
+ except (ValueError, TypeError, struct.error):
+ pass
+
def _extract_ism_formats(self, *args, **kwargs):
fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)
if subs: