From 01395a34345d1c6ba1b73ca92f94dd200dc45341 Mon Sep 17 00:00:00 2001 From: sepro <4618135+seproDev@users.noreply.github.com> Date: Sun, 12 May 2024 22:12:11 +0200 Subject: [PATCH] [cleanup] Remove questionable extractors (#9911) Closes #6279, Closes #6799 Authored by: seproDev --- yt_dlp/extractor/_extractors.py | 10 -- yt_dlp/extractor/cableav.py | 32 ------ yt_dlp/extractor/einthusan.py | 105 ----------------- yt_dlp/extractor/jable.py | 103 ----------------- yt_dlp/extractor/porn91.py | 95 --------------- yt_dlp/extractor/unsupported.py | 14 +++ yt_dlp/extractor/xfileshare.py | 198 -------------------------------- yt_dlp/extractor/yourporn.py | 65 ----------- yt_dlp/extractor/yourupload.py | 43 ------- 9 files changed, 14 insertions(+), 651 deletions(-) delete mode 100644 yt_dlp/extractor/cableav.py delete mode 100644 yt_dlp/extractor/einthusan.py delete mode 100644 yt_dlp/extractor/jable.py delete mode 100644 yt_dlp/extractor/porn91.py delete mode 100644 yt_dlp/extractor/xfileshare.py delete mode 100644 yt_dlp/extractor/yourporn.py delete mode 100644 yt_dlp/extractor/yourupload.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 1f095c932..cf408b682 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -288,7 +288,6 @@ from .bundestag import BundestagIE from .buzzfeed import BuzzFeedIE from .byutv import BYUtvIE from .c56 import C56IE -from .cableav import CableAVIE from .callin import CallinIE from .caltrans import CaltransIE from .cam4 import CAM4IE @@ -548,7 +547,6 @@ from .egghead import ( EggheadLessonIE, ) from .eighttracks import EightTracksIE -from .einthusan import EinthusanIE from .eitb import EitbIE from .elementorembed import ElementorEmbedIE from .elonet import ElonetIE @@ -861,10 +859,6 @@ from .iwara import ( ) from .ixigua import IxiguaIE from .izlesene import IzleseneIE -from .jable import ( - JableIE, - JablePlaylistIE, -) from .jamendo import ( JamendoIE, JamendoAlbumIE, @@ -1499,7 +1493,6 @@ from .polskieradio import ( ) from .popcorntimes import PopcorntimesIE from .popcorntv import PopcornTVIE -from .porn91 import Porn91IE from .pornbox import PornboxIE from .pornflip import PornFlipIE from .pornhub import ( @@ -2377,7 +2370,6 @@ from .wykop import ( ) from .xanimu import XanimuIE from .xboxclips import XboxClipsIE -from .xfileshare import XFileShareIE from .xhamster import ( XHamsterIE, XHamsterEmbedIE, @@ -2432,8 +2424,6 @@ from .younow import ( YouNowMomentIE, ) from .youporn import YouPornIE -from .yourporn import YourPornIE -from .yourupload import YourUploadIE from .zaiko import ( ZaikoIE, ZaikoETicketIE, diff --git a/yt_dlp/extractor/cableav.py b/yt_dlp/extractor/cableav.py deleted file mode 100644 index 4a221414e..000000000 --- a/yt_dlp/extractor/cableav.py +++ /dev/null @@ -1,32 +0,0 @@ -from .common import InfoExtractor - - -class CableAVIE(InfoExtractor): - _VALID_URL = r'https?://cableav\.tv/(?P[a-zA-Z0-9]+)' - _TESTS = [{ - 'url': 'https://cableav.tv/lS4iR9lWjN8/', - 'md5': '7e3fe5e49d61c4233b7f5b0f69b15e18', - 'info_dict': { - 'id': 'lS4iR9lWjN8', - 'ext': 'mp4', - 'title': '國產麻豆AV 叮叮映畫 DDF001 情欲小說家 - CableAV', - 'description': '國產AV 480p, 720p 国产麻豆AV 叮叮映画 DDF001 情欲小说家', - 'thumbnail': r're:^https?://.*\.jpg$', - } - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - video_url = self._og_search_video_url(webpage, secure=False) - - formats = self._extract_m3u8_formats(video_url, video_id, 'mp4') - - return { - 'id': video_id, - 'title': self._og_search_title(webpage), - 'description': self._og_search_description(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), - 'formats': formats, - } diff --git a/yt_dlp/extractor/einthusan.py b/yt_dlp/extractor/einthusan.py deleted file mode 100644 index 53bc2535d..000000000 --- a/yt_dlp/extractor/einthusan.py +++ /dev/null @@ -1,105 +0,0 @@ -import json - -from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_str, - compat_urlparse, -) -from ..utils import ( - extract_attributes, - ExtractorError, - get_elements_by_class, - urlencode_postdata, -) - - -class EinthusanIE(InfoExtractor): - _VALID_URL = r'https?://(?Peinthusan\.(?:tv|com|ca))/movie/watch/(?P[^/?#&]+)' - _TESTS = [{ - 'url': 'https://einthusan.tv/movie/watch/9097/', - 'md5': 'ff0f7f2065031b8a2cf13a933731c035', - 'info_dict': { - 'id': '9097', - 'ext': 'mp4', - 'title': 'Ae Dil Hai Mushkil', - 'description': 'md5:33ef934c82a671a94652a9b4e54d931b', - 'thumbnail': r're:^https?://.*\.jpg$', - } - }, { - 'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi', - 'only_matching': True, - }, { - 'url': 'https://einthusan.com/movie/watch/9097/', - 'only_matching': True, - }, { - 'url': 'https://einthusan.ca/movie/watch/4E9n/?lang=hindi', - 'only_matching': True, - }] - - # reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js - def _decrypt(self, encrypted_data, video_id): - return self._parse_json(compat_b64decode(( - encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1] - )).decode('utf-8'), video_id) - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - host = mobj.group('host') - video_id = mobj.group('id') - - webpage = self._download_webpage(url, video_id) - - title = self._html_search_regex(r'

([^<]+)

', webpage, 'title') - - player_params = extract_attributes(self._search_regex( - r'(]+id="UIVideoPlayer"[^>]+>)', webpage, 'player parameters')) - - page_id = self._html_search_regex( - ']+data-pageid="([^"]+)"', webpage, 'page ID') - video_data = self._download_json( - 'https://%s/ajax/movie/watch/%s/' % (host, video_id), video_id, - data=urlencode_postdata({ - 'xEvent': 'UIVideoPlayer.PingOutcome', - 'xJson': json.dumps({ - 'EJOutcomes': player_params['data-ejpingables'], - 'NativeHLS': False - }), - 'arcVersion': 3, - 'appVersion': 59, - 'gorilla.csrf.Token': page_id, - }))['Data'] - - if isinstance(video_data, compat_str) and video_data.startswith('/ratelimited/'): - raise ExtractorError( - 'Download rate reached. Please try again later.', expected=True) - - ej_links = self._decrypt(video_data['EJLinks'], video_id) - - formats = [] - - m3u8_url = ej_links.get('HLSLink') - if m3u8_url: - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native')) - - mp4_url = ej_links.get('MP4Link') - if mp4_url: - formats.append({ - 'url': mp4_url, - }) - - description = get_elements_by_class('synopsis', webpage)[0] - thumbnail = self._html_search_regex( - r''']+src=(["'])(?P(?!\1).+?/moviecovers/(?!\1).+?)\1''', - webpage, 'thumbnail url', fatal=False, group='url') - if thumbnail is not None: - thumbnail = compat_urlparse.urljoin(url, thumbnail) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'thumbnail': thumbnail, - 'description': description, - } diff --git a/yt_dlp/extractor/jable.py b/yt_dlp/extractor/jable.py deleted file mode 100644 index 71fed49ea..000000000 --- a/yt_dlp/extractor/jable.py +++ /dev/null @@ -1,103 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - InAdvancePagedList, - int_or_none, - orderedSet, - unified_strdate, -) - - -class JableIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?jable\.tv/videos/(?P[\w-]+)' - _TESTS = [{ - 'url': 'https://jable.tv/videos/pppd-812/', - 'md5': 'f1537283a9bc073c31ff86ca35d9b2a6', - 'info_dict': { - 'id': 'pppd-812', - 'ext': 'mp4', - 'title': 'PPPD-812 只要表現好巨乳女教師吉根柚莉愛就獎勵學生們在白虎穴內射出精液', - 'description': 'md5:5b6d4199a854f62c5e56e26ccad19967', - 'thumbnail': r're:^https?://.*\.jpg$', - 'age_limit': 18, - 'like_count': int, - 'view_count': int, - }, - }, { - 'url': 'https://jable.tv/videos/apak-220/', - 'md5': '71f9239d69ced58ab74a816908847cc1', - 'info_dict': { - 'id': 'apak-220', - 'ext': 'mp4', - 'title': 'md5:5c3861b7cf80112a6e2b70bccf170824', - 'description': '', - 'thumbnail': r're:^https?://.*\.jpg$', - 'age_limit': 18, - 'like_count': int, - 'view_count': int, - 'upload_date': '20220319', - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - formats = self._extract_m3u8_formats( - self._search_regex(r'var\s+hlsUrl\s*=\s*\'([^\']+)', webpage, 'hls_url'), video_id, 'mp4', m3u8_id='hls') - - return { - 'id': video_id, - 'title': self._og_search_title(webpage), - 'description': self._og_search_description(webpage, default=''), - 'thumbnail': self._og_search_thumbnail(webpage, default=None), - 'formats': formats, - 'age_limit': 18, - 'upload_date': unified_strdate(self._search_regex( - r'class="inactive-color">\D+\s+(\d{4}-\d+-\d+)', webpage, 'upload_date', default=None)), - 'view_count': int_or_none(self._search_regex( - r'#icon-eye">\n*([\d ]+)', - webpage, 'view_count', default='').replace(' ', '')), - 'like_count': int_or_none(self._search_regex( - r'#icon-heart">(\d+)', webpage, 'link_count', default=None)), - } - - -class JablePlaylistIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?jable\.tv/(?:categories|models|tags)/(?P[\w-]+)' - _TESTS = [{ - 'url': 'https://jable.tv/models/kaede-karen/', - 'info_dict': { - 'id': 'kaede-karen', - 'title': '楓カレン', - }, - 'playlist_count': 34, - }, { - 'url': 'https://jable.tv/categories/roleplay/', - 'only_matching': True, - }, { - 'url': 'https://jable.tv/tags/girl/', - 'only_matching': True, - }] - - def _real_extract(self, url): - playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - - def page_func(page_num): - return [ - self.url_result(player_url, JableIE) - for player_url in orderedSet(re.findall( - r'href="(https://jable.tv/videos/[\w-]+/?)"', - self._download_webpage(url, playlist_id, query={ - 'mode': 'async', - 'from': page_num + 1, - 'function': 'get_block', - 'block_id': 'list_videos_common_videos_list', - }, note=f'Downloading page {page_num + 1}')))] - - return self.playlist_result( - InAdvancePagedList(page_func, int_or_none(self._search_regex( - r'from:(\d+)">[^<]+\s*»', webpage, 'last page number', default=1)), 24), - playlist_id, self._search_regex( - r'

([^<]+)', webpage, 'playlist title', default=None)) diff --git a/yt_dlp/extractor/porn91.py b/yt_dlp/extractor/porn91.py deleted file mode 100644 index 7d16a1631..000000000 --- a/yt_dlp/extractor/porn91.py +++ /dev/null @@ -1,95 +0,0 @@ -import urllib.parse -from .common import InfoExtractor -from ..utils import ( - determine_ext, - int_or_none, - parse_duration, - remove_end, - unified_strdate, - ExtractorError, -) - - -class Porn91IE(InfoExtractor): - IE_NAME = '91porn' - _VALID_URL = r'(?:https?://)(?:www\.|)91porn\.com/view_video.php\?([^#]+&)?viewkey=(?P\w+)' - - _TESTS = [{ - 'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134', - 'md5': 'd869db281402e0ef4ddef3c38b866f86', - 'info_dict': { - 'id': '7e42283b4f5ab36da134', - 'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!', - 'description': 'md5:1ff241f579b07ae936a54e810ad2e891', - 'ext': 'mp4', - 'duration': 431, - 'upload_date': '20150520', - 'comment_count': int, - 'view_count': int, - 'age_limit': 18, - } - }, { - 'url': 'https://91porn.com/view_video.php?viewkey=7ef0cf3d362c699ab91c', - 'md5': 'f8fd50540468a6d795378cd778b40226', - 'info_dict': { - 'id': '7ef0cf3d362c699ab91c', - 'title': '真实空乘,冲上云霄第二部', - 'description': 'md5:618bf9652cafcc66cd277bd96789baea', - 'ext': 'mp4', - 'duration': 248, - 'upload_date': '20221119', - 'comment_count': int, - 'view_count': int, - 'age_limit': 18, - } - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - self._set_cookie('91porn.com', 'language', 'cn_CN') - - webpage = self._download_webpage( - 'http://91porn.com/view_video.php?viewkey=%s' % video_id, video_id) - - if '视频不存在,可能已经被删除或者被举报为不良内容!' in webpage: - raise ExtractorError('91 Porn says: Video does not exist', expected=True) - - daily_limit = self._search_regex( - r'作为游客,你每天只可观看([\d]+)个视频', webpage, 'exceeded daily limit', default=None, fatal=False) - if daily_limit: - raise ExtractorError(f'91 Porn says: Daily limit {daily_limit} videos exceeded', expected=True) - - video_link_url = self._search_regex( - r'document\.write\(\s*strencode2\s*\(\s*((?:"[^"]+")|(?:\'[^\']+\'))', webpage, 'video link') - video_link_url = self._search_regex( - r'src=["\']([^"\']+)["\']', urllib.parse.unquote(video_link_url), 'unquoted video link') - - formats, subtitles = self._get_formats_and_subtitle(video_link_url, video_id) - - return { - 'id': video_id, - 'title': remove_end(self._html_extract_title(webpage).replace('\n', ''), 'Chinese homemade video').strip(), - 'formats': formats, - 'subtitles': subtitles, - 'upload_date': unified_strdate(self._search_regex( - r'(\d{4}-\d{2}-\d{2})', webpage, 'upload_date', fatal=False)), - 'description': self._html_search_regex( - r'\s*([^<]+)', webpage, 'description', fatal=False), - 'duration': parse_duration(self._search_regex( - r'时长:\s*]*>\s*(\d+(?::\d+){1,2})', webpage, 'duration', fatal=False)), - 'comment_count': int_or_none(self._search_regex( - r'留言:\s*]*>\s*(\d+)\s*', webpage, 'comment count', fatal=False)), - 'view_count': int_or_none(self._search_regex( - r'热度:\s*]*>\s*(\d+)\s*', webpage, 'view count', fatal=False)), - 'age_limit': 18, - } - - def _get_formats_and_subtitle(self, video_link_url, video_id): - ext = determine_ext(video_link_url) - if ext == 'm3u8': - formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_link_url, video_id, ext='mp4') - else: - formats = [{'url': video_link_url, 'ext': ext}] - subtitles = {} - - return formats, subtitles diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py index 4316c31d2..1e2d118aa 100644 --- a/yt_dlp/extractor/unsupported.py +++ b/yt_dlp/extractor/unsupported.py @@ -173,6 +173,20 @@ class KnownPiracyIE(UnsupportedInfoExtractor): r'filemoon\.sx', r'hentai\.animestigma\.com', r'thisav\.com', + r'gounlimited\.to', + r'highstream\.tv', + r'uqload\.com', + r'vedbam\.xyz', + r'vadbam\.net' + r'vidlo\.us', + r'wolfstream\.tv', + r'xvideosharing\.com', + r'(?:\w+\.)?viidshar\.com', + r'sxyprn\.com', + r'jable\.tv', + r'91porn\.com', + r'einthusan\.(?:tv|com|ca)', + r'yourupload\.com', ) _TESTS = [{ diff --git a/yt_dlp/extractor/xfileshare.py b/yt_dlp/extractor/xfileshare.py deleted file mode 100644 index 08c6d6c7c..000000000 --- a/yt_dlp/extractor/xfileshare.py +++ /dev/null @@ -1,198 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - decode_packed_codes, - determine_ext, - int_or_none, - js_to_json, - urlencode_postdata, -) - - -# based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58 -def aa_decode(aa_code): - symbol_table = [ - ('7', '((゚ー゚) + (o^_^o))'), - ('6', '((o^_^o) +(o^_^o))'), - ('5', '((゚ー゚) + (゚Θ゚))'), - ('2', '((o^_^o) - (゚Θ゚))'), - ('4', '(゚ー゚)'), - ('3', '(o^_^o)'), - ('1', '(゚Θ゚)'), - ('0', '(c^_^o)'), - ] - delim = '(゚Д゚)[゚ε゚]+' - ret = '' - for aa_char in aa_code.split(delim): - for val, pat in symbol_table: - aa_char = aa_char.replace(pat, val) - aa_char = aa_char.replace('+ ', '') - m = re.match(r'^\d+', aa_char) - if m: - ret += chr(int(m.group(0), 8)) - else: - m = re.match(r'^u([\da-f]+)', aa_char) - if m: - ret += chr(int(m.group(1), 16)) - return ret - - -class XFileShareIE(InfoExtractor): - _SITES = ( - (r'aparat\.cam', 'Aparat'), - (r'clipwatching\.com', 'ClipWatching'), - (r'gounlimited\.to', 'GoUnlimited'), - (r'govid\.me', 'GoVid'), - (r'holavid\.com', 'HolaVid'), - (r'streamty\.com', 'Streamty'), - (r'thevideobee\.to', 'TheVideoBee'), - (r'uqload\.com', 'Uqload'), - (r'vidbom\.com', 'VidBom'), - (r'vidlo\.us', 'vidlo'), - (r'vidlocker\.xyz', 'VidLocker'), - (r'vidshare\.tv', 'VidShare'), - (r'vup\.to', 'VUp'), - (r'wolfstream\.tv', 'WolfStream'), - (r'xvideosharing\.com', 'XVideoSharing'), - ) - - IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1]) - _VALID_URL = (r'https?://(?:www\.)?(?P%s)/(?:embed-)?(?P[0-9a-zA-Z]+)' - % '|'.join(site for site in list(zip(*_SITES))[0])) - _EMBED_REGEX = [r']+\bsrc=(["\'])(?P(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1' % '|'.join(site for site in list(zip(*_SITES))[0])] - - _FILE_NOT_FOUND_REGEXES = ( - r'>(?:404 - )?File Not Found<', - r'>The file was removed by administrator<', - ) - - _TESTS = [{ - 'url': 'https://uqload.com/dltx1wztngdz', - 'md5': '3cfbb65e4c90e93d7b37bcb65a595557', - 'info_dict': { - 'id': 'dltx1wztngdz', - 'ext': 'mp4', - 'title': 'Rick Astley Never Gonna Give You mp4', - 'thumbnail': r're:https://.*\.jpg' - } - }, { - 'url': 'http://xvideosharing.com/fq65f94nd2ve', - 'md5': '4181f63957e8fe90ac836fa58dc3c8a6', - 'info_dict': { - 'id': 'fq65f94nd2ve', - 'ext': 'mp4', - 'title': 'sample', - 'thumbnail': r're:http://.*\.jpg', - }, - }, { - 'url': 'https://aparat.cam/n4d6dh0wvlpr', - 'only_matching': True, - }, { - 'url': 'https://wolfstream.tv/nthme29v9u2x', - 'only_matching': True, - }] - - def _real_extract(self, url): - host, video_id = self._match_valid_url(url).groups() - - url = 'https://%s/' % host + ('embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id) - webpage = self._download_webpage(url, video_id) - - if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES): - raise ExtractorError('Video %s does not exist' % video_id, expected=True) - - fields = self._hidden_inputs(webpage) - - if fields.get('op') == 'download1': - countdown = int_or_none(self._search_regex( - r'(?:[Ww]ait)?\s*(\d+)\s*(?:seconds?)?', - webpage, 'countdown', default=None)) - if countdown: - self._sleep(countdown, video_id) - - webpage = self._download_webpage( - url, video_id, 'Downloading video page', - data=urlencode_postdata(fields), headers={ - 'Referer': url, - 'Content-type': 'application/x-www-form-urlencoded', - }) - - title = (self._search_regex( - (r'style="z-index: [0-9]+;">([^<]+)', - r'([^<]+)', - r'h4-fine[^>]*>([^<]+)<', - r'>Watch (.+)[ <]', - r'

([^<]+)

', - r'

]*>([^<]+)<', # streamin.to - r'title\s*:\s*"([^"]+)"'), # govid.me - webpage, 'title', default=None) or self._og_search_title( - webpage, default=None) or video_id).strip() - - for regex, func in ( - (r'(eval\(function\(p,a,c,k,e,d\){.+)', decode_packed_codes), - (r'(゚.+)', aa_decode)): - obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None) - if obf_code: - webpage = webpage.replace(obf_code, func(obf_code)) - - formats = [] - - jwplayer_data = self._search_regex( - [ - r'jwplayer\("[^"]+"\)\.load\(\[({.+?})\]\);', - r'jwplayer\("[^"]+"\)\.setup\(({.+?})\);', - ], webpage, - 'jwplayer data', default=None) - if jwplayer_data: - jwplayer_data = self._parse_json( - jwplayer_data.replace(r"\'", "'"), video_id, js_to_json) - if jwplayer_data: - formats = self._parse_jwplayer_data( - jwplayer_data, video_id, False, - m3u8_id='hls', mpd_id='dash')['formats'] - - if not formats: - urls = [] - for regex in ( - r'(?:file|src)\s*:\s*(["\'])(?Phttp(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1', - r'file_link\s*=\s*(["\'])(?Phttp(?:(?!\1).)+)\1', - r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?Phttp(?:(?!\2).)+)\2\)', - r']+src=(["\'])(?Phttp(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'): - for mobj in re.finditer(regex, webpage): - video_url = mobj.group('url') - if video_url not in urls: - urls.append(video_url) - - sources = self._search_regex( - r'sources\s*:\s*(\[(?!{)[^\]]+\])', webpage, 'sources', default=None) - if sources: - urls.extend(self._parse_json(sources, video_id)) - - formats = [] - for video_url in urls: - if determine_ext(video_url) == 'm3u8': - formats.extend(self._extract_m3u8_formats( - video_url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', - fatal=False)) - else: - formats.append({ - 'url': video_url, - 'format_id': 'sd', - }) - - thumbnail = self._search_regex( - [ - r']+poster="([^"]+)"', - r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],', - ], webpage, 'thumbnail', default=None) - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'formats': formats, - 'http_headers': {'Referer': url} - } diff --git a/yt_dlp/extractor/yourporn.py b/yt_dlp/extractor/yourporn.py deleted file mode 100644 index 38f42a991..000000000 --- a/yt_dlp/extractor/yourporn.py +++ /dev/null @@ -1,65 +0,0 @@ -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - parse_duration, - urljoin, -) - - -class YourPornIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?sxyprn\.com/post/(?P[^/?#&.]+)' - _TESTS = [{ - 'url': 'https://sxyprn.com/post/57ffcb2e1179b.html', - 'md5': '6f8682b6464033d87acaa7a8ff0c092e', - 'info_dict': { - 'id': '57ffcb2e1179b', - 'ext': 'mp4', - 'title': 'md5:c9f43630bd968267672651ba905a7d35', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 165, - 'age_limit': 18, - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://sxyprn.com/post/57ffcb2e1179b.html', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - parts = self._parse_json( - self._search_regex( - r'data-vnfo=(["\'])(?P{.+?})\1', webpage, 'data info', - group='data'), - video_id)[video_id].split('/') - - num = 0 - for c in parts[6] + parts[7]: - if c.isnumeric(): - num += int(c) - parts[5] = compat_str(int(parts[5]) - num) - parts[1] += '8' - video_url = urljoin(url, '/'.join(parts)) - - title = (self._search_regex( - r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title', - default=None) or self._og_search_description(webpage)).strip() - thumbnail = self._og_search_thumbnail(webpage) - duration = parse_duration(self._search_regex( - r'duration\s*:\s*<[^>]+>([\d:]+)', webpage, 'duration', - default=None)) - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - 'age_limit': 18, - 'ext': 'mp4', - } diff --git a/yt_dlp/extractor/yourupload.py b/yt_dlp/extractor/yourupload.py deleted file mode 100644 index def63293a..000000000 --- a/yt_dlp/extractor/yourupload.py +++ /dev/null @@ -1,43 +0,0 @@ -from .common import InfoExtractor -from ..utils import urljoin - - -class YourUploadIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:yourupload\.com/(?:watch|embed)|embed\.yourupload\.com)/(?P[A-Za-z0-9]+)' - _TESTS = [{ - 'url': 'http://yourupload.com/watch/14i14h', - 'md5': '5e2c63385454c557f97c4c4131a393cd', - 'info_dict': { - 'id': '14i14h', - 'ext': 'mp4', - 'title': 'BigBuckBunny_320x180.mp4', - 'thumbnail': r're:^https?://.*\.jpe?g', - } - }, { - 'url': 'http://www.yourupload.com/embed/14i14h', - 'only_matching': True, - }, { - 'url': 'http://embed.yourupload.com/14i14h', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - embed_url = 'http://www.yourupload.com/embed/%s' % video_id - - webpage = self._download_webpage(embed_url, video_id) - - title = self._og_search_title(webpage) - video_url = urljoin(embed_url, self._og_search_video_url(webpage)) - thumbnail = self._og_search_thumbnail(webpage, default=None) - - return { - 'id': video_id, - 'title': title, - 'url': video_url, - 'thumbnail': thumbnail, - 'http_headers': { - 'Referer': embed_url, - }, - }