mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-14 12:23:19 +00:00
Making the parse_model function, address comments
This commit is contained in:
parent
9dbd9fc873
commit
e2ae76e84c
|
@ -798,9 +798,11 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'id': 'p0hj0lq7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nasser Hospital doctor describes his treatment by IDF',
|
||||
'description': 'Doctor Abu Sabha said he was detained by Israeli forces after the raid on Nasser Hospital and feared for his life.\n\nThe IDF said "during the activity, about 200 terrorists and suspects of terrorist activity were detained, including some who posed as medical teams, many weapons were found, as well as closed medicines intended for Israeli hostages."',
|
||||
'description': r're:(?s)Doctor Abu Sabha said he was detained by Israeli forces after .{276,} hostages\."$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1710270205000,
|
||||
'timestamp': 1710188248,
|
||||
'upload_date': '20240311',
|
||||
'duration': 104,
|
||||
},
|
||||
}, {
|
||||
# single video article embedded with data-media-vpid
|
||||
|
@ -1266,35 +1268,45 @@ def extract_all(pattern):
|
|||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||
re.findall(pattern, webpage))))
|
||||
|
||||
# US accessed article with single embedded video (e.g.
|
||||
# https://www.bbc.com/news/uk-68546268)
|
||||
next_data = traverse_obj(self._search_nextjs_data(webpage, playlist_id), (
|
||||
'props', 'pageProps', 'page'), get_all=False)
|
||||
video_data = traverse_obj(next_data, (
|
||||
..., 'contents', lambda _, v: v['type'] == 'video'), get_all=False)
|
||||
if video_data:
|
||||
model = traverse_obj(video_data, (
|
||||
'model', 'blocks', lambda _, v: v['type'] == 'media',
|
||||
'model', 'blocks', lambda _, v: v['type'] == 'mediaMetadata',
|
||||
'model'), get_all=False)
|
||||
if model:
|
||||
timestamp = traverse_obj(next_data, (
|
||||
..., 'contents', lambda _, v: v['type'] == 'timestamp',
|
||||
'model', 'timestamp', {int_or_none}, any))
|
||||
def parse_model(model):
|
||||
'''Extract single video from model structure'''
|
||||
if(type(model) == list):
|
||||
model = model[0]
|
||||
item_id = traverse_obj(model, ('versions', 0, 'versionId', {str}))
|
||||
if not item_id:
|
||||
return
|
||||
formats, subtitles = self._download_media_selector(item_id)
|
||||
entries.append({
|
||||
return {
|
||||
'id': item_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'timestamp': timestamp,
|
||||
**traverse_obj(model, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('imageUrl', {url_or_none}),
|
||||
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||
'description': (
|
||||
'synopses', ('long', 'medium', 'short'), {str}, any),
|
||||
'duration': ('versions', 0, 'duration', {int}),
|
||||
'timestamp': ('versions', 0, 'availableFrom', {lambda x: int_or_none(x, scale=1000)}),
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
# US accessed article with single embedded video (e.g.
|
||||
# https://www.bbc.com/news/uk-68546268)
|
||||
next_data = traverse_obj(self._search_nextjs_data(webpage, playlist_id, default={}), (
|
||||
'props', 'pageProps', 'page'))
|
||||
model = traverse_obj(next_data, (
|
||||
..., 'contents', lambda _, v: v['type'] == 'video',
|
||||
'model', 'blocks', lambda _, v: v['type'] == 'media',
|
||||
'model', 'blocks', lambda _, v: v['type'] == 'mediaMetadata',
|
||||
'model'))
|
||||
if model:
|
||||
entry = parse_model(model)
|
||||
if entry:
|
||||
if entry.get('timestamp') is None:
|
||||
entry['timestamp'] = traverse_obj(next_data, (
|
||||
..., 'contents', lambda _, v: v['type'] == 'timestamp',
|
||||
'model', 'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
|
||||
entries.append(entry)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
|
|
Loading…
Reference in a new issue