mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-22 08:46:43 +00:00
[ie/gem.cbc.ca] Fix formats extraction (#11196)
Also extracts `timestamp` and `release_timestamp` as seconds instead of milliseconds Authored by: DavidSkrundz
This commit is contained in:
parent
fed53d70bd
commit
40054cb4a7
|
@ -4,7 +4,6 @@
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..networking import HEADRequest
|
from ..networking import HEADRequest
|
||||||
|
@ -12,7 +11,6 @@
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
join_nonempty,
|
|
||||||
js_to_json,
|
js_to_json,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
@ -524,14 +522,13 @@ class CBCGemIE(InfoExtractor):
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# This is a normal, public, TV show video
|
# This is a normal, public, TV show video
|
||||||
'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
|
'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
|
||||||
'md5': '93dbb31c74a8e45b378cf13bd3f6f11e',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'schitts-creek/s06e01',
|
'id': 'schitts-creek/s06e01',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Smoke Signals',
|
'title': 'Smoke Signals',
|
||||||
'description': 'md5:929868d20021c924020641769eb3e7f1',
|
'description': 'md5:929868d20021c924020641769eb3e7f1',
|
||||||
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_06e01_thumbnail_v01.jpg?im=Resize=(Size)',
|
'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_06e01_thumbnail_v01\.jpg',
|
||||||
'duration': 1314,
|
'duration': 1324,
|
||||||
'categories': ['comedy'],
|
'categories': ['comedy'],
|
||||||
'series': 'Schitt\'s Creek',
|
'series': 'Schitt\'s Creek',
|
||||||
'season': 'Season 6',
|
'season': 'Season 6',
|
||||||
|
@ -539,19 +536,21 @@ class CBCGemIE(InfoExtractor):
|
||||||
'episode': 'Smoke Signals',
|
'episode': 'Smoke Signals',
|
||||||
'episode_number': 1,
|
'episode_number': 1,
|
||||||
'episode_id': 'schitts-creek/s06e01',
|
'episode_id': 'schitts-creek/s06e01',
|
||||||
|
'upload_date': '20210618',
|
||||||
|
'timestamp': 1623988800,
|
||||||
|
'release_date': '20200107',
|
||||||
|
'release_timestamp': 1578427200,
|
||||||
},
|
},
|
||||||
'params': {'format': 'bv'},
|
'params': {'format': 'bv'},
|
||||||
'skip': 'Geo-restricted to Canada',
|
|
||||||
}, {
|
}, {
|
||||||
# This video requires an account in the browser, but works fine in yt-dlp
|
# This video requires an account in the browser, but works fine in yt-dlp
|
||||||
'url': 'https://gem.cbc.ca/media/schitts-creek/s01e01',
|
'url': 'https://gem.cbc.ca/media/schitts-creek/s01e01',
|
||||||
'md5': '297a9600f554f2258aed01514226a697',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'schitts-creek/s01e01',
|
'id': 'schitts-creek/s01e01',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The Cup Runneth Over',
|
'title': 'The Cup Runneth Over',
|
||||||
'description': 'md5:9bca14ea49ab808097530eb05a29e797',
|
'description': 'md5:9bca14ea49ab808097530eb05a29e797',
|
||||||
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_01e01_thumbnail_v01.jpg?im=Resize=(Size)',
|
'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_01e01_thumbnail_v01\.jpg',
|
||||||
'series': 'Schitt\'s Creek',
|
'series': 'Schitt\'s Creek',
|
||||||
'season_number': 1,
|
'season_number': 1,
|
||||||
'season': 'Season 1',
|
'season': 'Season 1',
|
||||||
|
@ -560,9 +559,12 @@ class CBCGemIE(InfoExtractor):
|
||||||
'episode_id': 'schitts-creek/s01e01',
|
'episode_id': 'schitts-creek/s01e01',
|
||||||
'duration': 1309,
|
'duration': 1309,
|
||||||
'categories': ['comedy'],
|
'categories': ['comedy'],
|
||||||
|
'upload_date': '20210617',
|
||||||
|
'timestamp': 1623902400,
|
||||||
|
'release_date': '20151124',
|
||||||
|
'release_timestamp': 1448323200,
|
||||||
},
|
},
|
||||||
'params': {'format': 'bv'},
|
'params': {'format': 'bv'},
|
||||||
'skip': 'Geo-restricted to Canada',
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://gem.cbc.ca/nadiyas-family-favourites/s01e01',
|
'url': 'https://gem.cbc.ca/nadiyas-family-favourites/s01e01',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -631,38 +633,6 @@ def _real_initialize(self):
|
||||||
return
|
return
|
||||||
self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token')
|
self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token')
|
||||||
|
|
||||||
def _find_secret_formats(self, formats, video_id):
|
|
||||||
""" Find a valid video url and convert it to the secret variant """
|
|
||||||
base_format = next((f for f in formats if f.get('vcodec') != 'none'), None)
|
|
||||||
if not base_format:
|
|
||||||
return
|
|
||||||
|
|
||||||
base_url = re.sub(r'(Manifest\(.*?),filter=[\w-]+(.*?\))', r'\1\2', base_format['url'])
|
|
||||||
url = re.sub(r'(Manifest\(.*?),format=[\w-]+(.*?\))', r'\1\2', base_url)
|
|
||||||
|
|
||||||
secret_xml = self._download_xml(url, video_id, note='Downloading secret XML', fatal=False)
|
|
||||||
if not isinstance(secret_xml, xml.etree.ElementTree.Element):
|
|
||||||
return
|
|
||||||
|
|
||||||
for child in secret_xml:
|
|
||||||
if child.attrib.get('Type') != 'video':
|
|
||||||
continue
|
|
||||||
for video_quality in child:
|
|
||||||
bitrate = int_or_none(video_quality.attrib.get('Bitrate'))
|
|
||||||
if not bitrate or 'Index' not in video_quality.attrib:
|
|
||||||
continue
|
|
||||||
height = int_or_none(video_quality.attrib.get('MaxHeight'))
|
|
||||||
|
|
||||||
yield {
|
|
||||||
**base_format,
|
|
||||||
'format_id': join_nonempty('sec', height),
|
|
||||||
# Note: \g<1> is necessary instead of \1 since bitrate is a number
|
|
||||||
'url': re.sub(r'(QualityLevels\()\d+(\))', fr'\g<1>{bitrate}\2', base_url),
|
|
||||||
'width': int_or_none(video_quality.attrib.get('MaxWidth')),
|
|
||||||
'tbr': bitrate / 1000.0,
|
|
||||||
'height': height,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
video_info = self._download_json(
|
video_info = self._download_json(
|
||||||
|
@ -676,7 +646,6 @@ def _real_extract(self, url):
|
||||||
else:
|
else:
|
||||||
headers = {}
|
headers = {}
|
||||||
m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers)
|
m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers)
|
||||||
m3u8_url = m3u8_info.get('url')
|
|
||||||
|
|
||||||
if m3u8_info.get('errorCode') == 1:
|
if m3u8_info.get('errorCode') == 1:
|
||||||
self.raise_geo_restricted(countries=['CA'])
|
self.raise_geo_restricted(countries=['CA'])
|
||||||
|
@ -685,9 +654,9 @@ def _real_extract(self, url):
|
||||||
elif m3u8_info.get('errorCode') != 0:
|
elif m3u8_info.get('errorCode') != 0:
|
||||||
raise ExtractorError(f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}')
|
raise ExtractorError(f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}')
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls')
|
formats = self._extract_m3u8_formats(
|
||||||
|
m3u8_info['url'], video_id, 'mp4', m3u8_id='hls', query={'manifestType': ''})
|
||||||
self._remove_duplicate_formats(formats)
|
self._remove_duplicate_formats(formats)
|
||||||
formats.extend(self._find_secret_formats(formats, video_id))
|
|
||||||
|
|
||||||
for fmt in formats:
|
for fmt in formats:
|
||||||
if fmt.get('vcodec') == 'none':
|
if fmt.get('vcodec') == 'none':
|
||||||
|
@ -703,20 +672,21 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_info['title'],
|
|
||||||
'description': video_info.get('description'),
|
|
||||||
'thumbnail': video_info.get('image'),
|
|
||||||
'series': video_info.get('series'),
|
|
||||||
'season_number': video_info.get('season'),
|
|
||||||
'season': f'Season {video_info.get("season")}',
|
|
||||||
'episode_number': video_info.get('episode'),
|
|
||||||
'episode': video_info.get('title'),
|
|
||||||
'episode_id': video_id,
|
'episode_id': video_id,
|
||||||
'duration': video_info.get('duration'),
|
|
||||||
'categories': [video_info.get('category')],
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'release_timestamp': video_info.get('airDate'),
|
**traverse_obj(video_info, {
|
||||||
'timestamp': video_info.get('availableDate'),
|
'title': ('title', {str}),
|
||||||
|
'episode': ('title', {str}),
|
||||||
|
'description': ('description', {str}),
|
||||||
|
'thumbnail': ('image', {url_or_none}),
|
||||||
|
'series': ('series', {str}),
|
||||||
|
'season_number': ('season', {int_or_none}),
|
||||||
|
'episode_number': ('episode', {int_or_none}),
|
||||||
|
'duration': ('duration', {int_or_none}),
|
||||||
|
'categories': ('category', {str}, all),
|
||||||
|
'release_timestamp': ('airDate', {int_or_none(scale=1000)}),
|
||||||
|
'timestamp': ('availableDate', {int_or_none(scale=1000)}),
|
||||||
|
}),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue