mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-16 13:23:20 +00:00
[extractor] Make search_json able to parse lists
Now `contains_pattern` can be set to `\[.+\]`
This commit is contained in:
parent
a83333c432
commit
8b7fb8b60d
|
@ -1227,7 +1227,7 @@ def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, f
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _search_json(self, start_pattern, string, name, video_id, *, end_pattern='',
|
def _search_json(self, start_pattern, string, name, video_id, *, end_pattern='',
|
||||||
contains_pattern='(?s:.+)', fatal=True, default=NO_DEFAULT, **kwargs):
|
contains_pattern=r'{(?s:.+)}', fatal=True, default=NO_DEFAULT, **kwargs):
|
||||||
"""Searches string for the JSON object specified by start_pattern"""
|
"""Searches string for the JSON object specified by start_pattern"""
|
||||||
# NB: end_pattern is only used to reduce the size of the initial match
|
# NB: end_pattern is only used to reduce the size of the initial match
|
||||||
if default is NO_DEFAULT:
|
if default is NO_DEFAULT:
|
||||||
|
@ -1236,7 +1236,7 @@ def _search_json(self, start_pattern, string, name, video_id, *, end_pattern='',
|
||||||
fatal, has_default = False, True
|
fatal, has_default = False, True
|
||||||
|
|
||||||
json_string = self._search_regex(
|
json_string = self._search_regex(
|
||||||
rf'(?:{start_pattern})\s*(?P<json>{{\s*(?:{contains_pattern})\s*}})\s*(?:{end_pattern})',
|
rf'(?:{start_pattern})\s*(?P<json>{contains_pattern})\s*(?:{end_pattern})',
|
||||||
string, name, group='json', fatal=fatal, default=None if has_default else NO_DEFAULT)
|
string, name, group='json', fatal=fatal, default=None if has_default else NO_DEFAULT)
|
||||||
if not json_string:
|
if not json_string:
|
||||||
return default
|
return default
|
||||||
|
|
|
@ -54,7 +54,7 @@ def _real_extract(self, url):
|
||||||
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
|
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
|
||||||
|
|
||||||
info_json = self._search_json(r'InitReact\.mountComponent\(.*?,', webpage, 'mountComponent', video_id,
|
info_json = self._search_json(r'InitReact\.mountComponent\(.*?,', webpage, 'mountComponent', video_id,
|
||||||
contains_pattern=r'.+?"preview".+?', end_pattern=r'\)')['props']
|
contains_pattern=r'{.+?"preview".+?}', end_pattern=r'\)')['props']
|
||||||
transcode_url = traverse_obj(info_json, ((None, 'preview'), 'file', 'preview', 'content', 'transcode_url'), get_all=False)
|
transcode_url = traverse_obj(info_json, ((None, 'preview'), 'file', 'preview', 'content', 'transcode_url'), get_all=False)
|
||||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id)
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id)
|
||||||
|
|
||||||
|
|
|
@ -84,7 +84,7 @@ def _real_extract(self, url):
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
# _search_json_ld doesn't correctly handle this. See https://github.com/yt-dlp/yt-dlp/pull/3874#discussion_r891903846
|
# _search_json_ld doesn't correctly handle this. See https://github.com/yt-dlp/yt-dlp/pull/3874#discussion_r891903846
|
||||||
video_data = self._search_json('', webpage, 'audio data', display_id, contains_pattern=r'\s*"@type"\s*:\s*"AudioObject"\s*.+')
|
video_data = self._search_json('', webpage, 'audio data', display_id, contains_pattern=r'{\s*"@type"\s*:\s*"AudioObject".+}')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
|
Loading…
Reference in a new issue