mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-27 10:31:29 +00:00
[ie/Douyin] Fix extractor (#9239)
Closes #7854, Closes #7941 Authored by: 114514ns, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
This commit is contained in:
parent
e28e135d6f
commit
9ff9466455
|
@ -6,7 +6,7 @@
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse
|
from ..compat import compat_urllib_parse_urlparse
|
||||||
from ..networking import HEADRequest
|
from ..networking import HEADRequest
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
@ -15,7 +15,6 @@
|
||||||
UserNotLive,
|
UserNotLive,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
format_field,
|
format_field,
|
||||||
get_first,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
join_nonempty,
|
join_nonempty,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
|
@ -219,8 +218,8 @@ def audio_meta(url):
|
||||||
def extract_addr(addr, add_meta={}):
|
def extract_addr(addr, add_meta={}):
|
||||||
parsed_meta, res = parse_url_key(addr.get('url_key', ''))
|
parsed_meta, res = parse_url_key(addr.get('url_key', ''))
|
||||||
if res:
|
if res:
|
||||||
known_resolutions.setdefault(res, {}).setdefault('height', add_meta.get('height') or addr.get('height'))
|
known_resolutions.setdefault(res, {}).setdefault('height', int_or_none(addr.get('height')))
|
||||||
known_resolutions[res].setdefault('width', add_meta.get('width') or addr.get('width'))
|
known_resolutions[res].setdefault('width', int_or_none(addr.get('width')))
|
||||||
parsed_meta.update(known_resolutions.get(res, {}))
|
parsed_meta.update(known_resolutions.get(res, {}))
|
||||||
add_meta.setdefault('height', int_or_none(res[:-1]))
|
add_meta.setdefault('height', int_or_none(res[:-1]))
|
||||||
return [{
|
return [{
|
||||||
|
@ -237,22 +236,26 @@ def extract_addr(addr, add_meta={}):
|
||||||
|
|
||||||
# Hack: Add direct video links first to prioritize them when removing duplicate formats
|
# Hack: Add direct video links first to prioritize them when removing duplicate formats
|
||||||
formats = []
|
formats = []
|
||||||
|
width = int_or_none(video_info.get('width'))
|
||||||
|
height = int_or_none(video_info.get('height'))
|
||||||
if video_info.get('play_addr'):
|
if video_info.get('play_addr'):
|
||||||
formats.extend(extract_addr(video_info['play_addr'], {
|
formats.extend(extract_addr(video_info['play_addr'], {
|
||||||
'format_id': 'play_addr',
|
'format_id': 'play_addr',
|
||||||
'format_note': 'Direct video',
|
'format_note': 'Direct video',
|
||||||
'vcodec': 'h265' if traverse_obj(
|
'vcodec': 'h265' if traverse_obj(
|
||||||
video_info, 'is_bytevc1', 'is_h265') else 'h264', # TODO: Check for "direct iOS" videos, like https://www.tiktok.com/@cookierun_dev/video/7039716639834656002
|
video_info, 'is_bytevc1', 'is_h265') else 'h264', # TODO: Check for "direct iOS" videos, like https://www.tiktok.com/@cookierun_dev/video/7039716639834656002
|
||||||
'width': video_info.get('width'),
|
'width': width,
|
||||||
'height': video_info.get('height'),
|
'height': height,
|
||||||
}))
|
}))
|
||||||
if video_info.get('download_addr'):
|
if video_info.get('download_addr'):
|
||||||
formats.extend(extract_addr(video_info['download_addr'], {
|
download_addr = video_info['download_addr']
|
||||||
|
dl_width = int_or_none(download_addr.get('width'))
|
||||||
|
formats.extend(extract_addr(download_addr, {
|
||||||
'format_id': 'download_addr',
|
'format_id': 'download_addr',
|
||||||
'format_note': 'Download video%s' % (', watermarked' if video_info.get('has_watermark') else ''),
|
'format_note': 'Download video%s' % (', watermarked' if video_info.get('has_watermark') else ''),
|
||||||
'vcodec': 'h264',
|
'vcodec': 'h264',
|
||||||
'width': video_info.get('width'),
|
'width': dl_width or width,
|
||||||
'height': video_info.get('height'),
|
'height': try_call(lambda: int(dl_width / 0.5625)) or height, # download_addr['height'] is wrong
|
||||||
'preference': -2 if video_info.get('has_watermark') else -1,
|
'preference': -2 if video_info.get('has_watermark') else -1,
|
||||||
}))
|
}))
|
||||||
if video_info.get('play_addr_h264'):
|
if video_info.get('play_addr_h264'):
|
||||||
|
@ -921,20 +924,23 @@ class DouyinIE(TikTokBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.douyin.com/video/6961737553342991651',
|
'url': 'https://www.douyin.com/video/6961737553342991651',
|
||||||
'md5': 'a97db7e3e67eb57bf40735c022ffa228',
|
'md5': '9ecce7bc5b302601018ecb2871c63a75',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '6961737553342991651',
|
'id': '6961737553342991651',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '#杨超越 小小水手带你去远航❤️',
|
'title': '#杨超越 小小水手带你去远航❤️',
|
||||||
'description': '#杨超越 小小水手带你去远航❤️',
|
'description': '#杨超越 小小水手带你去远航❤️',
|
||||||
|
'uploader': '6897520xka',
|
||||||
'uploader_id': '110403406559',
|
'uploader_id': '110403406559',
|
||||||
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||||
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||||
'creator': '杨超越',
|
'creator': '杨超越',
|
||||||
'duration': 19782,
|
'creators': ['杨超越'],
|
||||||
|
'duration': 19,
|
||||||
'timestamp': 1620905839,
|
'timestamp': 1620905839,
|
||||||
'upload_date': '20210513',
|
'upload_date': '20210513',
|
||||||
'track': '@杨超越创作的原声',
|
'track': '@杨超越创作的原声',
|
||||||
|
'artists': ['杨超越'],
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
|
@ -943,20 +949,23 @@ class DouyinIE(TikTokBaseIE):
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.douyin.com/video/6982497745948921092',
|
'url': 'https://www.douyin.com/video/6982497745948921092',
|
||||||
'md5': '34a87ebff3833357733da3fe17e37c0e',
|
'md5': '15c5e660b7048af3707304e3cc02bbb5',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '6982497745948921092',
|
'id': '6982497745948921092',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '这个夏日和小羊@杨超越 一起遇见白色幻想',
|
'title': '这个夏日和小羊@杨超越 一起遇见白色幻想',
|
||||||
'description': '这个夏日和小羊@杨超越 一起遇见白色幻想',
|
'description': '这个夏日和小羊@杨超越 一起遇见白色幻想',
|
||||||
|
'uploader': '0731chaoyue',
|
||||||
'uploader_id': '408654318141572',
|
'uploader_id': '408654318141572',
|
||||||
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
|
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
|
||||||
'channel_id': 'MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
|
'channel_id': 'MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
|
||||||
'creator': '杨超越工作室',
|
'creator': '杨超越工作室',
|
||||||
'duration': 42479,
|
'creators': ['杨超越工作室'],
|
||||||
|
'duration': 42,
|
||||||
'timestamp': 1625739481,
|
'timestamp': 1625739481,
|
||||||
'upload_date': '20210708',
|
'upload_date': '20210708',
|
||||||
'track': '@杨超越工作室创作的原声',
|
'track': '@杨超越工作室创作的原声',
|
||||||
|
'artists': ['杨超越工作室'],
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
|
@ -965,20 +974,23 @@ class DouyinIE(TikTokBaseIE):
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.douyin.com/video/6953975910773099811',
|
'url': 'https://www.douyin.com/video/6953975910773099811',
|
||||||
'md5': 'dde3302460f19db59c47060ff013b902',
|
'md5': '0e6443758b8355db9a3c34864a4276be',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '6953975910773099811',
|
'id': '6953975910773099811',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '#一起看海 出现在你的夏日里',
|
'title': '#一起看海 出现在你的夏日里',
|
||||||
'description': '#一起看海 出现在你的夏日里',
|
'description': '#一起看海 出现在你的夏日里',
|
||||||
|
'uploader': '6897520xka',
|
||||||
'uploader_id': '110403406559',
|
'uploader_id': '110403406559',
|
||||||
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||||
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||||
'creator': '杨超越',
|
'creator': '杨超越',
|
||||||
'duration': 17343,
|
'creators': ['杨超越'],
|
||||||
|
'duration': 17,
|
||||||
'timestamp': 1619098692,
|
'timestamp': 1619098692,
|
||||||
'upload_date': '20210422',
|
'upload_date': '20210422',
|
||||||
'track': '@杨超越创作的原声',
|
'track': '@杨超越创作的原声',
|
||||||
|
'artists': ['杨超越'],
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
|
@ -1004,20 +1016,23 @@ class DouyinIE(TikTokBaseIE):
|
||||||
'skip': 'No longer available',
|
'skip': 'No longer available',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.douyin.com/video/6963263655114722595',
|
'url': 'https://www.douyin.com/video/6963263655114722595',
|
||||||
'md5': 'cf9f11f0ec45d131445ec2f06766e122',
|
'md5': '1440bcf59d8700f8e014da073a4dfea8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '6963263655114722595',
|
'id': '6963263655114722595',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
|
'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
|
||||||
'description': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
|
'description': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
|
||||||
|
'uploader': '6897520xka',
|
||||||
'uploader_id': '110403406559',
|
'uploader_id': '110403406559',
|
||||||
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||||
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||||
'creator': '杨超越',
|
'creator': '杨超越',
|
||||||
'duration': 15115,
|
'creators': ['杨超越'],
|
||||||
|
'duration': 15,
|
||||||
'timestamp': 1621261163,
|
'timestamp': 1621261163,
|
||||||
'upload_date': '20210517',
|
'upload_date': '20210517',
|
||||||
'track': '@杨超越创作的原声',
|
'track': '@杨超越创作的原声',
|
||||||
|
'artists': ['杨超越'],
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
|
@ -1025,34 +1040,23 @@ class DouyinIE(TikTokBaseIE):
|
||||||
'thumbnail': r're:https?://.+\.jpe?g',
|
'thumbnail': r're:https?://.+\.jpe?g',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
_APP_VERSIONS = [('23.3.0', '230300')]
|
|
||||||
_APP_NAME = 'aweme'
|
|
||||||
_AID = 1128
|
|
||||||
_API_HOSTNAME = 'aweme.snssdk.com'
|
|
||||||
_UPLOADER_URL_FORMAT = 'https://www.douyin.com/user/%s'
|
_UPLOADER_URL_FORMAT = 'https://www.douyin.com/user/%s'
|
||||||
_WEBPAGE_HOST = 'https://www.douyin.com/'
|
_WEBPAGE_HOST = 'https://www.douyin.com/'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
try:
|
detail = traverse_obj(self._download_json(
|
||||||
return self._extract_aweme_app(video_id)
|
'https://www.douyin.com/aweme/v1/web/aweme/detail/', video_id,
|
||||||
except ExtractorError as e:
|
'Downloading web detail JSON', 'Failed to download web detail JSON',
|
||||||
e.expected = True
|
query={'aweme_id': video_id}, fatal=False), ('aweme_detail', {dict}))
|
||||||
self.to_screen(f'{e}; trying with webpage')
|
if not detail:
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
render_data = self._search_json(
|
|
||||||
r'<script [^>]*\bid=[\'"]RENDER_DATA[\'"][^>]*>', webpage, 'render data', video_id,
|
|
||||||
contains_pattern=r'%7B(?s:.+)%7D', fatal=False, transform_source=compat_urllib_parse_unquote)
|
|
||||||
if not render_data:
|
|
||||||
# TODO: Run verification challenge code to generate signature cookies
|
# TODO: Run verification challenge code to generate signature cookies
|
||||||
cookies = self._get_cookies(self._WEBPAGE_HOST)
|
|
||||||
expected = not cookies.get('s_v_web_id') or not cookies.get('ttwid')
|
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Fresh cookies (not necessarily logged in) are needed', expected=expected)
|
'Fresh cookies (not necessarily logged in) are needed',
|
||||||
|
expected=not self._get_cookies(self._WEBPAGE_HOST).get('s_v_web_id'))
|
||||||
|
|
||||||
return self._parse_aweme_video_web(get_first(render_data, ('aweme', 'detail')), url, video_id)
|
return self._parse_aweme_video_app(detail)
|
||||||
|
|
||||||
|
|
||||||
class TikTokVMIE(InfoExtractor):
|
class TikTokVMIE(InfoExtractor):
|
||||||
|
|
Loading…
Reference in a new issue