mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-08 09:01:10 +00:00
[youtube] Add YoutubeStoriesIE (#3362)
Get channel stories with `ytstories:<channel UCID>` Authored-by: coletdjnz
This commit is contained in:
parent
3fe75fdc80
commit
6e634cbe42
|
@ -89,6 +89,7 @@ # NEW FEATURES
|
||||||
* `255kbps` audio is extracted (if available) from youtube music when premium cookies are given
|
* `255kbps` audio is extracted (if available) from youtube music when premium cookies are given
|
||||||
* Youtube music Albums, channels etc can be downloaded ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723))
|
* Youtube music Albums, channels etc can be downloaded ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723))
|
||||||
* Download livestreams from the start using `--live-from-start` (experimental)
|
* Download livestreams from the start using `--live-from-start` (experimental)
|
||||||
|
* Support for downloading stories (`ytstories:<channel UCID>`)
|
||||||
|
|
||||||
* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE]`
|
* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE]`
|
||||||
|
|
||||||
|
|
|
@ -2115,6 +2115,7 @@
|
||||||
YoutubeSearchURLIE,
|
YoutubeSearchURLIE,
|
||||||
YoutubeMusicSearchURLIE,
|
YoutubeMusicSearchURLIE,
|
||||||
YoutubeSubscriptionsIE,
|
YoutubeSubscriptionsIE,
|
||||||
|
YoutubeStoriesIE,
|
||||||
YoutubeTruncatedIDIE,
|
YoutubeTruncatedIDIE,
|
||||||
YoutubeTruncatedURLIE,
|
YoutubeTruncatedURLIE,
|
||||||
YoutubeYtBeIE,
|
YoutubeYtBeIE,
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import base64
|
||||||
import calendar
|
import calendar
|
||||||
import copy
|
import copy
|
||||||
import datetime
|
import datetime
|
||||||
|
@ -2199,7 +2200,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'description': 'md5:2ef1d002cad520f65825346e2084e49d',
|
'description': 'md5:2ef1d002cad520f65825346e2084e49d',
|
||||||
},
|
},
|
||||||
'params': {'skip_download': True}
|
'params': {'skip_download': True}
|
||||||
},
|
}, {
|
||||||
|
# Story. Requires specific player params to work.
|
||||||
|
# Note: stories get removed after some period of time
|
||||||
|
'url': 'https://www.youtube.com/watch?v=yN3x1t3sieA',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'yN3x1t3sieA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader': 'Linus Tech Tips',
|
||||||
|
'duration': 13,
|
||||||
|
'channel': 'Linus Tech Tips',
|
||||||
|
'playable_in_embed': True,
|
||||||
|
'tags': [],
|
||||||
|
'age_limit': 0,
|
||||||
|
'uploader_url': 'http://www.youtube.com/user/LinusTechTips',
|
||||||
|
'upload_date': '20220402',
|
||||||
|
'thumbnail': 'https://i.ytimg.com/vi_webp/yN3x1t3sieA/maxresdefault.webp',
|
||||||
|
'title': 'Story',
|
||||||
|
'live_status': 'not_live',
|
||||||
|
'uploader_id': 'LinusTechTips',
|
||||||
|
'view_count': int,
|
||||||
|
'description': '',
|
||||||
|
'channel_id': 'UCXuqSBlHAE6Xw-yeJA0Tunw',
|
||||||
|
'categories': ['Science & Technology'],
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCXuqSBlHAE6Xw-yeJA0Tunw',
|
||||||
|
'availability': 'unlisted',
|
||||||
|
}
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -2831,12 +2858,17 @@ def extract_thread(contents):
|
||||||
lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
|
lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
|
||||||
|
|
||||||
continuation = self._extract_continuation(root_continuation_data)
|
continuation = self._extract_continuation(root_continuation_data)
|
||||||
message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
|
|
||||||
if message and not parent:
|
|
||||||
self.report_warning(message, video_id=video_id)
|
|
||||||
|
|
||||||
response = None
|
response = None
|
||||||
|
is_forced_continuation = False
|
||||||
is_first_continuation = parent is None
|
is_first_continuation = parent is None
|
||||||
|
if is_first_continuation and not continuation:
|
||||||
|
# Sometimes you can get comments by generating the continuation yourself,
|
||||||
|
# even if YouTube initially reports them being disabled - e.g. stories comments.
|
||||||
|
# Note: if the comment section is actually disabled, YouTube may return a response with
|
||||||
|
# required check_get_keys missing. So we will disable that check initially in this case.
|
||||||
|
continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
|
||||||
|
is_forced_continuation = True
|
||||||
|
|
||||||
for page_num in itertools.count(0):
|
for page_num in itertools.count(0):
|
||||||
if not continuation:
|
if not continuation:
|
||||||
|
@ -2857,8 +2889,8 @@ def extract_thread(contents):
|
||||||
response = self._extract_response(
|
response = self._extract_response(
|
||||||
item_id=None, query=continuation,
|
item_id=None, query=continuation,
|
||||||
ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
|
ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
|
||||||
check_get_keys='onResponseReceivedEndpoints')
|
check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
|
||||||
|
is_forced_continuation = False
|
||||||
continuation_contents = traverse_obj(
|
continuation_contents = traverse_obj(
|
||||||
response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
|
response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
|
||||||
|
|
||||||
|
@ -2883,6 +2915,18 @@ def extract_thread(contents):
|
||||||
if continuation:
|
if continuation:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
|
||||||
|
if message and not parent and tracker['running_total'] == 0:
|
||||||
|
self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _generate_comment_continuation(video_id):
|
||||||
|
"""
|
||||||
|
Generates initial comment section continuation token from given video id
|
||||||
|
"""
|
||||||
|
token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
|
||||||
|
return base64.b64encode(token.encode()).decode()
|
||||||
|
|
||||||
def _get_comments(self, ytcfg, video_id, contents, webpage):
|
def _get_comments(self, ytcfg, video_id, contents, webpage):
|
||||||
"""Entry for comment extraction"""
|
"""Entry for comment extraction"""
|
||||||
def _real_comment_extract(contents):
|
def _real_comment_extract(contents):
|
||||||
|
@ -2936,7 +2980,10 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
|
||||||
headers = self.generate_api_headers(
|
headers = self.generate_api_headers(
|
||||||
ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
|
ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
|
||||||
|
|
||||||
yt_query = {'videoId': video_id}
|
yt_query = {
|
||||||
|
'videoId': video_id,
|
||||||
|
'params': '8AEB' # enable stories
|
||||||
|
}
|
||||||
yt_query.update(self._generate_player_context(sts))
|
yt_query.update(self._generate_player_context(sts))
|
||||||
return self._extract_response(
|
return self._extract_response(
|
||||||
item_id=video_id, ep='player', query=yt_query,
|
item_id=video_id, ep='player', query=yt_query,
|
||||||
|
@ -3251,7 +3298,7 @@ def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
|
||||||
webpage = None
|
webpage = None
|
||||||
if 'webpage' not in self._configuration_arg('player_skip'):
|
if 'webpage' not in self._configuration_arg('player_skip'):
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
|
webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)
|
||||||
|
|
||||||
master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
|
master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
|
||||||
|
|
||||||
|
@ -3696,7 +3743,7 @@ def process_language(container, base_url, lang_code, sub_name, query):
|
||||||
unified_strdate(get_first(microformats, 'uploadDate'))
|
unified_strdate(get_first(microformats, 'uploadDate'))
|
||||||
or unified_strdate(search_meta('uploadDate')))
|
or unified_strdate(search_meta('uploadDate')))
|
||||||
if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):
|
if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):
|
||||||
upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d')
|
upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date
|
||||||
info['upload_date'] = upload_date
|
info['upload_date'] = upload_date
|
||||||
|
|
||||||
for to, frm in fallbacks.items():
|
for to, frm in fallbacks.items():
|
||||||
|
@ -4211,7 +4258,7 @@ def _get_uncropped(url):
|
||||||
self._extract_visitor_data(data, ytcfg)),
|
self._extract_visitor_data(data, ytcfg)),
|
||||||
**metadata)
|
**metadata)
|
||||||
|
|
||||||
def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
|
def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
|
||||||
first_id = last_id = response = None
|
first_id = last_id = response = None
|
||||||
for page_num in itertools.count(1):
|
for page_num in itertools.count(1):
|
||||||
videos = list(self._playlist_entries(playlist))
|
videos = list(self._playlist_entries(playlist))
|
||||||
|
@ -4221,9 +4268,6 @@ def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
|
||||||
if start >= len(videos):
|
if start >= len(videos):
|
||||||
return
|
return
|
||||||
for video in videos[start:]:
|
for video in videos[start:]:
|
||||||
if video['id'] == first_id:
|
|
||||||
self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
|
|
||||||
return
|
|
||||||
yield video
|
yield video
|
||||||
first_id = first_id or videos[0]['id']
|
first_id = first_id or videos[0]['id']
|
||||||
last_id = videos[-1]['id']
|
last_id = videos[-1]['id']
|
||||||
|
@ -4255,13 +4299,18 @@ def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
|
||||||
playlist_url = urljoin(url, try_get(
|
playlist_url = urljoin(url, try_get(
|
||||||
playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
|
playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
|
||||||
compat_str))
|
compat_str))
|
||||||
if playlist_url and playlist_url != url:
|
|
||||||
|
# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
|
||||||
|
# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
|
||||||
|
is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
|
||||||
|
|
||||||
|
if playlist_url and playlist_url != url and not is_known_unviewable:
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
|
playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
|
||||||
video_title=title)
|
video_title=title)
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
|
self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
|
||||||
playlist_id=playlist_id, playlist_title=title)
|
playlist_id=playlist_id, playlist_title=title)
|
||||||
|
|
||||||
def _extract_availability(self, data):
|
def _extract_availability(self, data):
|
||||||
|
@ -5798,6 +5847,22 @@ class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class YoutubeStoriesIE(InfoExtractor):
|
||||||
|
IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
|
||||||
|
IE_NAME = 'youtube:stories'
|
||||||
|
_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = f'RLTD{self._match_id(url)}'
|
||||||
|
return self.url_result(
|
||||||
|
f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',
|
||||||
|
ie=YoutubeTabIE, video_id=playlist_id)
|
||||||
|
|
||||||
|
|
||||||
class YoutubeTruncatedURLIE(InfoExtractor):
|
class YoutubeTruncatedURLIE(InfoExtractor):
|
||||||
IE_NAME = 'youtube:truncated_url'
|
IE_NAME = 'youtube:truncated_url'
|
||||||
IE_DESC = False # Do not list
|
IE_DESC = False # Do not list
|
||||||
|
|
Loading…
Reference in a new issue