mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-09-28 13:47:53 +00:00
[ie/youtube] Extract upload date timestamp if available
This commit is contained in:
parent
ac817bc83e
commit
17bcceda78
|
@ -5,6 +5,7 @@
|
|||
import sys
|
||||
import unittest
|
||||
import warnings
|
||||
import datetime as dt
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
@ -768,6 +769,8 @@ def test_encode_compat_str(self):
|
|||
|
||||
def test_parse_iso8601(self):
|
||||
self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266)
|
||||
self.assertEqual(parse_iso8601('2014-03-23T23:04:26-07:00'), 1395641066)
|
||||
self.assertEqual(parse_iso8601('2014-03-23T23:04:26', timezone=dt.timedelta(hours=-7)), 1395641066)
|
||||
self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
|
||||
self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266)
|
||||
self.assertEqual(parse_iso8601('2014-03-23T22:04:26.1234Z'), 1395612266)
|
||||
|
|
|
@ -4552,19 +4552,43 @@ def process_language(container, base_url, lang_code, sub_name, query):
|
|||
'uploader_id': channel_handle,
|
||||
'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
|
||||
})
|
||||
|
||||
def get_pacific_tz():
|
||||
# Python 3.8 should be deprecated soon
|
||||
# This fallback may make the timestamp slightly inaccurate for 3.8 users.
|
||||
if sys.version_info < (3, 9):
|
||||
return dt.timedelta(hours=-7)
|
||||
from zoneinfo import ZoneInfo
|
||||
return dt.datetime.now(ZoneInfo('US/Pacific')).utcoffset()
|
||||
|
||||
# We only want timestamp IF it has second precision
|
||||
# Additionally, if there is no timezone present, we should assume it is in PT.
|
||||
timestamp = (
|
||||
parse_iso8601(get_first(microformats, 'uploadDate'), timezone=get_pacific_tz())
|
||||
or parse_iso8601(search_meta('uploadDate'), timezone=get_pacific_tz())
|
||||
)
|
||||
upload_date = (
|
||||
dt.datetime.fromtimestamp(timestamp, dt.timezone.utc).strftime('%Y%m%d') if timestamp else
|
||||
(
|
||||
unified_strdate(get_first(microformats, 'uploadDate'))
|
||||
or unified_strdate(search_meta('uploadDate'))
|
||||
))
|
||||
|
||||
# In the case we cannot get the timestamp:
|
||||
# The upload date for scheduled, live and past live streams / premieres in microformats
|
||||
# may be different from the stream date. Although not in UTC, we will prefer it in this case.
|
||||
# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
|
||||
upload_date = (
|
||||
unified_strdate(get_first(microformats, 'uploadDate'))
|
||||
or unified_strdate(search_meta('uploadDate')))
|
||||
if not upload_date or (
|
||||
live_status in ('not_live', None)
|
||||
not timestamp
|
||||
and live_status in ('not_live', None)
|
||||
and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
|
||||
):
|
||||
# this should be in UTC, as configured in the cookie/client context
|
||||
upload_date = strftime_or_none(
|
||||
self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
|
||||
|
||||
info['upload_date'] = upload_date
|
||||
info['timestamp'] = timestamp
|
||||
|
||||
if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'):
|
||||
# Newly uploaded videos' HLS formats are potentially problematic and need to be checked
|
||||
|
|
|
@ -1134,7 +1134,7 @@ def is_path_like(f):
|
|||
return isinstance(f, (str, bytes, os.PathLike))
|
||||
|
||||
|
||||
def extract_timezone(date_str):
|
||||
def extract_timezone(date_str, default=None):
|
||||
m = re.search(
|
||||
r'''(?x)
|
||||
^.{8,}? # >=8 char non-TZ prefix, if present
|
||||
|
@ -1146,22 +1146,23 @@ def extract_timezone(date_str):
|
|||
(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
|
||||
$)
|
||||
''', date_str)
|
||||
timezone = None
|
||||
|
||||
if not m:
|
||||
m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
|
||||
timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
|
||||
if timezone is not None:
|
||||
date_str = date_str[:-len(m.group('tz'))]
|
||||
timezone = dt.timedelta(hours=timezone or 0)
|
||||
timezone = dt.timedelta(hours=timezone)
|
||||
else:
|
||||
date_str = date_str[:-len(m.group('tz'))]
|
||||
if not m.group('sign'):
|
||||
timezone = dt.timedelta()
|
||||
else:
|
||||
if m.group('sign'):
|
||||
sign = 1 if m.group('sign') == '+' else -1
|
||||
timezone = dt.timedelta(
|
||||
hours=sign * int(m.group('hours')),
|
||||
minutes=sign * int(m.group('minutes')))
|
||||
return timezone, date_str
|
||||
|
||||
return timezone or default or dt.timedelta(), date_str
|
||||
|
||||
|
||||
def parse_iso8601(date_str, delimiter='T', timezone=None):
|
||||
|
@ -1172,8 +1173,7 @@ def parse_iso8601(date_str, delimiter='T', timezone=None):
|
|||
|
||||
date_str = re.sub(r'\.[0-9]+', '', date_str)
|
||||
|
||||
if timezone is None:
|
||||
timezone, date_str = extract_timezone(date_str)
|
||||
timezone, date_str = extract_timezone(date_str, timezone)
|
||||
|
||||
with contextlib.suppress(ValueError):
|
||||
date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
|
||||
|
|
Loading…
Reference in a new issue