mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-05 23:54:24 +00:00
[YoutubeDL] Fill more fields for playlists (#2824)
This commit is contained in:
parent
09b49e1f68
commit
03f830040a
|
@ -1584,6 +1584,7 @@ def process_ie_result(self, ie_result, download=True, extra_info=None):
|
|||
|
||||
self._playlist_level += 1
|
||||
self._playlist_urls.add(webpage_url)
|
||||
self._fill_common_fields(ie_result, False)
|
||||
self._sanitize_thumbnails(ie_result)
|
||||
try:
|
||||
return self.__process_playlist(ie_result, download)
|
||||
|
@ -2308,6 +2309,58 @@ def check_thumbnails(thumbnails):
|
|||
else:
|
||||
info_dict['thumbnails'] = thumbnails
|
||||
|
||||
def _fill_common_fields(self, info_dict, is_video=True):
|
||||
# TODO: move sanitization here
|
||||
if is_video:
|
||||
# playlists are allowed to lack "title"
|
||||
info_dict['fulltitle'] = info_dict.get('title')
|
||||
if 'title' not in info_dict:
|
||||
raise ExtractorError('Missing "title" field in extractor result',
|
||||
video_id=info_dict['id'], ie=info_dict['extractor'])
|
||||
elif not info_dict.get('title'):
|
||||
self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
|
||||
info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
|
||||
|
||||
if info_dict.get('duration') is not None:
|
||||
info_dict['duration_string'] = formatSeconds(info_dict['duration'])
|
||||
|
||||
for ts_key, date_key in (
|
||||
('timestamp', 'upload_date'),
|
||||
('release_timestamp', 'release_date'),
|
||||
('modified_timestamp', 'modified_date'),
|
||||
):
|
||||
if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
|
||||
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
||||
# see http://bugs.python.org/issue1646728)
|
||||
try:
|
||||
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
|
||||
info_dict[date_key] = upload_date.strftime('%Y%m%d')
|
||||
except (ValueError, OverflowError, OSError):
|
||||
pass
|
||||
|
||||
live_keys = ('is_live', 'was_live')
|
||||
live_status = info_dict.get('live_status')
|
||||
if live_status is None:
|
||||
for key in live_keys:
|
||||
if info_dict.get(key) is False:
|
||||
continue
|
||||
if info_dict.get(key):
|
||||
live_status = key
|
||||
break
|
||||
if all(info_dict.get(key) is False for key in live_keys):
|
||||
live_status = 'not_live'
|
||||
if live_status:
|
||||
info_dict['live_status'] = live_status
|
||||
for key in live_keys:
|
||||
if info_dict.get(key) is None:
|
||||
info_dict[key] = (live_status == key)
|
||||
|
||||
# Auto generate title fields corresponding to the *_number fields when missing
|
||||
# in order to always have clean titles. This is very common for TV series.
|
||||
for field in ('chapter', 'season', 'episode'):
|
||||
if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
|
||||
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
|
||||
|
||||
def process_video_result(self, info_dict, download=True):
|
||||
assert info_dict.get('_type', 'video') == 'video'
|
||||
self._num_videos += 1
|
||||
|
@ -2317,14 +2370,6 @@ def process_video_result(self, info_dict, download=True):
|
|||
elif not info_dict.get('id'):
|
||||
raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
|
||||
|
||||
info_dict['fulltitle'] = info_dict.get('title')
|
||||
if 'title' not in info_dict:
|
||||
raise ExtractorError('Missing "title" field in extractor result',
|
||||
video_id=info_dict['id'], ie=info_dict['extractor'])
|
||||
elif not info_dict.get('title'):
|
||||
self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
|
||||
info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
|
||||
|
||||
def report_force_conversion(field, field_not, conversion):
|
||||
self.report_warning(
|
||||
'"%s" field is not %s - forcing %s conversion, there is an error in extractor'
|
||||
|
@ -2365,45 +2410,7 @@ def sanitize_numeric_fields(info):
|
|||
if info_dict.get('display_id') is None and 'id' in info_dict:
|
||||
info_dict['display_id'] = info_dict['id']
|
||||
|
||||
if info_dict.get('duration') is not None:
|
||||
info_dict['duration_string'] = formatSeconds(info_dict['duration'])
|
||||
|
||||
for ts_key, date_key in (
|
||||
('timestamp', 'upload_date'),
|
||||
('release_timestamp', 'release_date'),
|
||||
('modified_timestamp', 'modified_date'),
|
||||
):
|
||||
if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
|
||||
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
||||
# see http://bugs.python.org/issue1646728)
|
||||
try:
|
||||
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
|
||||
info_dict[date_key] = upload_date.strftime('%Y%m%d')
|
||||
except (ValueError, OverflowError, OSError):
|
||||
pass
|
||||
|
||||
live_keys = ('is_live', 'was_live')
|
||||
live_status = info_dict.get('live_status')
|
||||
if live_status is None:
|
||||
for key in live_keys:
|
||||
if info_dict.get(key) is False:
|
||||
continue
|
||||
if info_dict.get(key):
|
||||
live_status = key
|
||||
break
|
||||
if all(info_dict.get(key) is False for key in live_keys):
|
||||
live_status = 'not_live'
|
||||
if live_status:
|
||||
info_dict['live_status'] = live_status
|
||||
for key in live_keys:
|
||||
if info_dict.get(key) is None:
|
||||
info_dict[key] = (live_status == key)
|
||||
|
||||
# Auto generate title fields corresponding to the *_number fields when missing
|
||||
# in order to always have clean titles. This is very common for TV series.
|
||||
for field in ('chapter', 'season', 'episode'):
|
||||
if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
|
||||
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
|
||||
self._fill_common_fields(info_dict)
|
||||
|
||||
for cc_kind in ('subtitles', 'automatic_captions'):
|
||||
cc = info_dict.get(cc_kind)
|
||||
|
|
Loading…
Reference in a new issue