[ie/twitter:broadcast] Improve metadata extraction (#8383)

Authored by: HitomaruKonpaku
This commit is contained in:
HitomaruKonpaku 2023-11-11 08:34:22 +07:00 committed by GitHub
parent 10025b715e
commit 7d337ca977
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 40 additions and 5 deletions

View file

@ -24,12 +24,13 @@ def _parse_broadcast_data(self, broadcast, video_id):
thumbnails = [{
'url': broadcast[image],
} for image in ('image_url', 'image_url_small') if broadcast.get(image)]
} for image in ('image_url', 'image_url_medium', 'image_url_small') if broadcast.get(image)]
return {
'id': broadcast.get('id') or video_id,
'title': title,
'timestamp': parse_iso8601(broadcast.get('created_at')),
'timestamp': parse_iso8601(broadcast.get('created_at')) or int_or_none(
broadcast.get('created_at_ms'), scale=1000),
'uploader': uploader,
'uploader_id': broadcast.get('user_id') or broadcast.get('username'),
'thumbnails': thumbnails,

View file

@ -1563,7 +1563,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
IE_NAME = 'twitter:broadcast'
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
_TEST = {
_TESTS = [{
# untitled Periscope video
'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
'info_dict': {
@ -1571,11 +1571,42 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
'ext': 'mp4',
'title': 'Andrea May Sahouri - Periscope Broadcast',
'uploader': 'Andrea May Sahouri',
'uploader_id': '1PXEdBZWpGwKe',
'uploader_id': 'andreamsahouri',
'uploader_url': 'https://twitter.com/andreamsahouri',
'timestamp': 1590973638,
'upload_date': '20200601',
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
'view_count': int,
},
}
}, {
'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
'info_dict': {
'id': '1ZkKzeyrPbaxv',
'ext': 'mp4',
'title': 'Starship | SN10 | High-Altitude Flight Test',
'uploader': 'SpaceX',
'uploader_id': 'SpaceX',
'uploader_url': 'https://twitter.com/SpaceX',
'timestamp': 1614812942,
'upload_date': '20210303',
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
'view_count': int,
},
}, {
'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
'info_dict': {
'id': '1OyKAVQrgzwGb',
'ext': 'mp4',
'title': 'Starship Flight Test',
'uploader': 'SpaceX',
'uploader_id': 'SpaceX',
'uploader_url': 'https://twitter.com/SpaceX',
'timestamp': 1681993964,
'upload_date': '20230420',
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
'view_count': int,
},
}]
def _real_extract(self, url):
broadcast_id = self._match_id(url)
@ -1585,6 +1616,9 @@ def _real_extract(self, url):
if not broadcast:
raise ExtractorError('Broadcast no longer exists', expected=True)
info = self._parse_broadcast_data(broadcast, broadcast_id)
info['title'] = broadcast.get('status') or info.get('title')
info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
media_key = broadcast['media_key']
source = self._call_api(
f'live_video_stream/status/{media_key}', media_key)['source']