From bdceb022d0a69511dd4ea5a88eeb92e1e5e89c1c Mon Sep 17 00:00:00 2001 From: scrat5h <118751590+scrat5h@users.noreply.github.com> Date: Mon, 21 Nov 2022 23:04:29 +0100 Subject: [PATCH 1/5] [extractor/pornhub] uploader in test renamed from Babes to BABES-COM --- yt_dlp/extractor/pornhub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py index 5d8d7c100..bc8bf0799 100644 --- a/yt_dlp/extractor/pornhub.py +++ b/yt_dlp/extractor/pornhub.py @@ -136,7 +136,7 @@ class PornHubIE(PornHubBaseIE): 'id': '648719015', 'ext': 'mp4', 'title': 'Seductive Indian beauty strips down and fingers her pink pussy', - 'uploader': 'Babes', + 'uploader': 'BABES-COM', 'upload_date': '20130628', 'timestamp': 1372447216, 'duration': 361, From 1138e33ac5547050474dc081626ba48157eb7c8e Mon Sep 17 00:00:00 2001 From: scrat5h <118751590+scrat5h@users.noreply.github.com> Date: Mon, 21 Nov 2022 23:06:09 +0100 Subject: [PATCH 2/5] [extractor/pornhub] Add data for tests --- yt_dlp/extractor/pornhub.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py index bc8bf0799..e228ec6b1 100644 --- a/yt_dlp/extractor/pornhub.py +++ b/yt_dlp/extractor/pornhub.py @@ -131,12 +131,14 @@ class PornHubIE(PornHubBaseIE): _EMBED_REGEX = [r']+?src=["\'](?P(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)'] _TESTS = [{ 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', - 'md5': 'a6391306d050e4547f62b3f485dd9ba9', + 'md5': '4d4a4e9178b655776f86cf89ecaf0edf', 'info_dict': { 'id': '648719015', 'ext': 'mp4', + 'thumbnail': r're:^https://.i\.phncdn\.com/videos/201306/28/14084201/original/.*\.jpg', 'title': 'Seductive Indian beauty strips down and fingers her pink pussy', 'uploader': 'BABES-COM', + 'uploader_id': '/users/babes-com', 'upload_date': '20130628', 'timestamp': 1372447216, 'duration': 361, @@ -201,11 +203,22 @@ class PornHubIE(PornHubBaseIE): 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph601dc30bae19a', 'info_dict': { 'id': 'ph601dc30bae19a', + 'ext': 'mp4', 'uploader': 'Projekt Melody', 'uploader_id': 'projekt-melody', 'upload_date': '20210205', 'title': '"Welcome to My Pussy Mansion" - CB Stream (02/03/21)', 'thumbnail': r're:https?://.+', + 'age_limit': 18, + 'view_count': int, + 'cast': [], + 'like_count': int, + 'comment_count': int, + 'dislike_count': int, + 'timestamp': 1612564932, + 'duration': 8173, + 'categories': list, + 'tags': list, }, }, { 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', From 6887f872085891b139e022234ab17473d6bfb2d8 Mon Sep 17 00:00:00 2001 From: scrat5h <118751590+scrat5h@users.noreply.github.com> Date: Mon, 21 Nov 2022 15:38:09 +0000 Subject: [PATCH 3/5] [extractor/pornhub] URL to subtitles is relative `closedCaptionsFile` field on the page is not absolute URL (anymore?) but relative one. This change makes extracting subtitles working again. --- yt_dlp/extractor/pornhub.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py index e228ec6b1..620ca85f6 100644 --- a/yt_dlp/extractor/pornhub.py +++ b/yt_dlp/extractor/pornhub.py @@ -11,6 +11,7 @@ from ..compat import compat_HTTPError, compat_str from ..utils import ( NO_DEFAULT, ExtractorError, + base_url, clean_html, determine_ext, format_field, @@ -23,6 +24,7 @@ from ..utils import ( update_url_query, url_or_none, urlencode_postdata, + urljoin, ) @@ -325,10 +327,10 @@ class PornHubIE(PornHubBaseIE): r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'), video_id) if flashvars: - subtitle_url = url_or_none(flashvars.get('closedCaptionsFile')) + subtitle_url = flashvars.get('closedCaptionsFile') if subtitle_url: subtitles.setdefault('en', []).append({ - 'url': subtitle_url, + 'url': urljoin(base_url(url), subtitle_url), 'ext': 'srt', }) thumbnail = flashvars.get('image_url') From 9ecbbcd844c0e26e7325004994add692fd381b05 Mon Sep 17 00:00:00 2001 From: scrat5h <118751590+scrat5h@users.noreply.github.com> Date: Tue, 22 Nov 2022 10:50:47 +0100 Subject: [PATCH 4/5] [extractor/pornhub] Remove fetch of /video/get_media for formats Previously URLs that contained `/video/get_media` returned JSON with available formats. Some time ago Pornhub seem to removed this endpoint and has started to return `HTTP Error 403: Forbidden`, see #4298. Nowadays it seem to serve originally requested html page which, of course, cannot be parsed with JSON parser. yt-dlp produced WARNING: ``` Failed to parse JSON: Expecting value in '': line 1 column 1 (char 0). ``` Since we are already getting format of the video by other means (`mpd` or `m3u8`) this change removes fetching of URLs that don't provide value to us anymore. Fixes: #5615 --- yt_dlp/extractor/pornhub.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py index 620ca85f6..5dcee8660 100644 --- a/yt_dlp/extractor/pornhub.py +++ b/yt_dlp/extractor/pornhub.py @@ -457,18 +457,6 @@ class PornHubIE(PornHubBaseIE): r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None) if upload_date: upload_date = upload_date.replace('/', '') - if '/video/get_media' in video_url: - medias = self._download_json(video_url, video_id, fatal=False) - if isinstance(medias, list): - for media in medias: - if not isinstance(media, dict): - continue - video_url = url_or_none(media.get('videoUrl')) - if not video_url: - continue - height = int_or_none(media.get('quality')) - add_format(video_url, height) - continue add_format(video_url) model_profile = self._search_json( From df8c550abe52607f88b83c5d859dd1b1d039aec5 Mon Sep 17 00:00:00 2001 From: scrat5h <118751590+scrat5h@users.noreply.github.com> Date: Wed, 23 Nov 2022 12:34:42 +0100 Subject: [PATCH 5/5] Revert "[extractor/pornhub] Remove fetch of /video/get_media for formats" This reverts commit 9ecbbcd844c0e26e7325004994add692fd381b05. --- yt_dlp/extractor/pornhub.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py index 5dcee8660..620ca85f6 100644 --- a/yt_dlp/extractor/pornhub.py +++ b/yt_dlp/extractor/pornhub.py @@ -457,6 +457,18 @@ class PornHubIE(PornHubBaseIE): r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None) if upload_date: upload_date = upload_date.replace('/', '') + if '/video/get_media' in video_url: + medias = self._download_json(video_url, video_id, fatal=False) + if isinstance(medias, list): + for media in medias: + if not isinstance(media, dict): + continue + video_url = url_or_none(media.get('videoUrl')) + if not video_url: + continue + height = int_or_none(media.get('quality')) + add_format(video_url, height) + continue add_format(video_url) model_profile = self._search_json(