[ie/youporn] Fix extractor (#8827)

Closes #7967
Authored by: The-MAGI
This commit is contained in:
The-MAGI 2024-05-06 01:57:38 +03:00 committed by GitHub
parent 96da952504
commit 351368cb9a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -72,15 +72,15 @@ class YouPornIE(InfoExtractor):
'id': '16290308', 'id': '16290308',
'age_limit': 18, 'age_limit': 18,
'categories': [], 'categories': [],
'description': 'md5:00ea70f642f431c379763c17c2f396bc', 'description': str, # TODO: detect/remove SEO spam description in ytdl backport
'display_id': 'tinderspecial-trailer1', 'display_id': 'tinderspecial-trailer1',
'duration': 298.0, 'duration': 298.0,
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20201123', 'upload_date': '20201123',
'uploader': 'Ersties', 'uploader': 'Ersties',
'tags': [], 'tags': [],
'thumbnail': 'https://fi1.ypncdn.com/202011/23/16290308/original/8/tinderspecial-trailer1-8(m=eaAaaEPbaaaa).jpg', 'thumbnail': r're:https://.+\.jpg',
'timestamp': 1606089600, 'timestamp': 1606147564,
'title': 'Tinder In Real Life', 'title': 'Tinder In Real Life',
'view_count': int, 'view_count': int,
} }
@ -88,11 +88,17 @@ class YouPornIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id') video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
definitions = self._download_json( self._set_cookie('.youporn.com', 'age_verified', '1')
f'https://www.youporn.com/api/video/media_definitions/{video_id}/', display_id or video_id) webpage = self._download_webpage(f'https://www.youporn.com/watch/{video_id}', video_id)
definitions = self._search_json(r'\bplayervars\s*:', webpage, 'player vars', video_id)['mediaDefinitions']
def get_format_data(data, f): def get_format_data(data, stream_type):
return traverse_obj(data, lambda _, v: v['format'] == f and url_or_none(v['videoUrl'])) info_url = traverse_obj(data, (lambda _, v: v['format'] == stream_type, 'videoUrl', {url_or_none}, any))
if not info_url:
return []
return traverse_obj(
self._download_json(info_url, video_id, f'Downloading {stream_type} info JSON', fatal=False),
lambda _, v: v['format'] == stream_type and url_or_none(v['videoUrl']))
formats = [] formats = []
# Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s # Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
@ -123,10 +129,6 @@ def get_format_data(data, f):
f['height'] = height f['height'] = height
formats.append(f) formats.append(f)
webpage = self._download_webpage(
'http://www.youporn.com/watch/%s' % video_id, display_id,
headers={'Cookie': 'age_verified=1'})
title = self._html_search_regex( title = self._html_search_regex(
r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>', r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
webpage, 'title', default=None) or self._og_search_title( webpage, 'title', default=None) or self._og_search_title(