[tiktok] Detect embeds

Closes #3799
This commit is contained in:
pukkandan 2022-05-20 06:01:08 +05:30
parent 0b9c08b47b
commit b801cd7179
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39
2 changed files with 24 additions and 10 deletions

View file

@ -74,6 +74,7 @@
from .ted import TedEmbedIE from .ted import TedEmbedIE
from .theplatform import ThePlatformIE from .theplatform import ThePlatformIE
from .threeqsdn import ThreeQSDNIE from .threeqsdn import ThreeQSDNIE
from .tiktok import TikTokIE
from .tnaflix import TNAFlixNetworkEmbedIE from .tnaflix import TNAFlixNetworkEmbedIE
from .tube8 import Tube8IE from .tube8 import Tube8IE
from .tunein import TuneInBaseIE from .tunein import TuneInBaseIE
@ -3756,6 +3757,11 @@ def _real_extract(self, url):
if ruutu_urls: if ruutu_urls:
return self.playlist_from_matches(ruutu_urls, video_id, video_title) return self.playlist_from_matches(ruutu_urls, video_id, video_title)
# Look for Tiktok embeds
tiktok_urls = TikTokIE._extract_urls(webpage)
if tiktok_urls:
return self.playlist_from_matches(tiktok_urls, video_id, video_title)
# Look for HTML5 media # Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries: if entries:

View file

@ -1,28 +1,26 @@
import itertools import itertools
import json
import random import random
import re
import string import string
import time import time
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse
compat_urllib_parse_unquote,
compat_urllib_parse_urlparse
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
HEADRequest, HEADRequest,
LazyList,
UnsupportedError, UnsupportedError,
get_first, get_first,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
LazyList, qualities,
srt_subtitles_timecode, srt_subtitles_timecode,
str_or_none, str_or_none,
traverse_obj, traverse_obj,
try_get, try_get,
url_or_none, url_or_none,
qualities,
) )
@ -36,6 +34,10 @@ class TikTokBaseIE(InfoExtractor):
_WEBPAGE_HOST = 'https://www.tiktok.com/' _WEBPAGE_HOST = 'https://www.tiktok.com/'
QUALITIES = ('360p', '540p', '720p', '1080p') QUALITIES = ('360p', '540p', '720p', '1080p')
@staticmethod
def _create_url(user_id, video_id):
return f'https://www.tiktok.com/@{user_id or "_"}/video/{video_id}'
def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True, def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
note='Downloading API JSON', errnote='Unable to download API page'): note='Downloading API JSON', errnote='Unable to download API page'):
self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for _ in range(160))) self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for _ in range(160)))
@ -361,7 +363,7 @@ def _parse_aweme_video_web(self, aweme_detail, webpage_url):
class TikTokIE(TikTokBaseIE): class TikTokIE(TikTokBaseIE):
_VALID_URL = r'https?://www\.tiktok\.com/@[\w\.-]+/video/(?P<id>\d+)' _VALID_URL = r'https?://www\.tiktok\.com/(?:embed|@(?P<user_id>[\w\.-]+)/video)/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610', 'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610',
@ -466,7 +468,7 @@ class TikTokIE(TikTokBaseIE):
'info_dict': { 'info_dict': {
'id': '7059698374567611694', 'id': '7059698374567611694',
'ext': 'mp4', 'ext': 'mp4',
'title': 'tiktok video #7059698374567611694', 'title': 'TikTok video #7059698374567611694',
'description': '', 'description': '',
'uploader': 'pokemonlife22', 'uploader': 'pokemonlife22',
'creator': 'Pokemon', 'creator': 'Pokemon',
@ -490,6 +492,11 @@ class TikTokIE(TikTokBaseIE):
'only_matching': True 'only_matching': True
}] }]
@classmethod
def _extract_urls(cls, webpage):
return [mobj.group('url') for mobj in re.finditer(
rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{cls._VALID_URL})', webpage)]
def _extract_aweme_app(self, aweme_id): def _extract_aweme_app(self, aweme_id):
try: try:
aweme_detail = self._call_api('aweme/detail', {'aweme_id': aweme_id}, aweme_id, aweme_detail = self._call_api('aweme/detail', {'aweme_id': aweme_id}, aweme_id,
@ -506,7 +513,8 @@ def _extract_aweme_app(self, aweme_id):
return self._parse_aweme_video_app(aweme_detail) return self._parse_aweme_video_app(aweme_detail)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id, user_id = self._match_valid_url(url).group('id', 'user_id')
url = self._create_url(user_id, video_id)
try: try:
return self._extract_aweme_app(video_id) return self._extract_aweme_app(video_id)