Update to ytdl-commit-379f52a

[liveleak] Remove extractor
379f52a495
This commit is contained in:
pukkandan 2021-06-23 06:56:37 +05:30
parent 29f63c9672
commit ed807c1837
No known key found for this signature in database
GPG key ID: 0F00D95A001F4698
12 changed files with 98 additions and 285 deletions

View file

@ -9,10 +9,10 @@
class AppleConnectIE(InfoExtractor): class AppleConnectIE(InfoExtractor):
_VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)' _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P<id>[\w-]+)'
_TEST = { _TESTS = [{
'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3', 'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
'md5': 'e7c38568a01ea45402570e6029206723', 'md5': 'c1d41f72c8bcaf222e089434619316e4',
'info_dict': { 'info_dict': {
'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3', 'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
'ext': 'm4v', 'ext': 'm4v',
@ -22,7 +22,10 @@ class AppleConnectIE(InfoExtractor):
'upload_date': '20150710', 'upload_date': '20150710',
'timestamp': 1436545535, 'timestamp': 1436545535,
}, },
} }, {
'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -36,7 +39,7 @@ def _real_extract(self, url):
video_data = self._parse_json(video_json, video_id) video_data = self._parse_json(video_json, video_id)
timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp')) timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count')) like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None))
return { return {
'id': video_id, 'id': video_id,

View file

@ -281,7 +281,7 @@ def _real_extract(self, url):
webpage) webpage)
if uploader_mobj: if uploader_mobj:
info.update({ info.update({
'uploader': uploader_mobj.group('name'), 'uploader': uploader_mobj.group('name').strip(),
'uploader_id': uploader_mobj.group('id'), 'uploader_id': uploader_mobj.group('id'),
}) })

View file

@ -143,9 +143,9 @@ def _real_extract(self, url):
} }
class CuriosityStreamCollectionsIE(CuriosityStreamBaseIE): class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
IE_NAME = 'curiositystream:collections' IE_NAME = 'curiositystream:collection'
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/collections/(?P<id>\d+)' _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collections?|series)/(?P<id>\d+)'
_API_BASE_URL = 'https://api.curiositystream.com/v2/collections/' _API_BASE_URL = 'https://api.curiositystream.com/v2/collections/'
_TESTS = [{ _TESTS = [{
'url': 'https://curiositystream.com/collections/86', 'url': 'https://curiositystream.com/collections/86',
@ -155,6 +155,20 @@ class CuriosityStreamCollectionsIE(CuriosityStreamBaseIE):
'description': 'Wondering where to start? Here are a few of our favorite series and films... from our couch to yours.', 'description': 'Wondering where to start? Here are a few of our favorite series and films... from our couch to yours.',
}, },
'playlist_mincount': 7, 'playlist_mincount': 7,
}, {
'url': 'https://app.curiositystream.com/collection/2',
'info_dict': {
'id': '2',
'title': 'Curious Minds: The Internet',
'description': 'How is the internet shaping our lives in the 21st Century?',
},
'playlist_mincount': 16,
}, {
'url': 'https://curiositystream.com/series/2',
'only_matching': True,
}, {
'url': 'https://curiositystream.com/collections/36',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -163,25 +177,10 @@ def _real_extract(self, url):
entries = [] entries = []
for media in collection.get('media', []): for media in collection.get('media', []):
media_id = compat_str(media.get('id')) media_id = compat_str(media.get('id'))
media_type, ie = ('series', CuriosityStreamSeriesIE) if media.get('is_collection') else ('video', CuriosityStreamIE) media_type, ie = ('series', CuriosityStreamCollectionIE) if media.get('is_collection') else ('video', CuriosityStreamIE)
entries.append(self.url_result( entries.append(self.url_result(
'https://curiositystream.com/%s/%s' % (media_type, media_id), 'https://curiositystream.com/%s/%s' % (media_type, media_id),
ie=ie.ie_key(), video_id=media_id)) ie=ie.ie_key(), video_id=media_id))
return self.playlist_result( return self.playlist_result(
entries, collection_id, entries, collection_id,
collection.get('title'), collection.get('description')) collection.get('title'), collection.get('description'))
class CuriosityStreamSeriesIE(CuriosityStreamCollectionsIE):
IE_NAME = 'curiositystream:series'
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/series/(?P<id>\d+)'
_API_BASE_URL = 'https://api.curiositystream.com/v2/series/'
_TESTS = [{
'url': 'https://app.curiositystream.com/series/2',
'info_dict': {
'id': '2',
'title': 'Curious Minds: The Internet',
'description': 'How is the internet shaping our lives in the 21st Century?',
},
'playlist_mincount': 16,
}]

View file

@ -22,16 +22,19 @@ def _call_api(self, path, video_id, resource, fatal=True):
class EggheadCourseIE(EggheadBaseIE): class EggheadCourseIE(EggheadBaseIE):
IE_DESC = 'egghead.io course' IE_DESC = 'egghead.io course'
IE_NAME = 'egghead:course' IE_NAME = 'egghead:course'
_VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)' _VALID_URL = r'https://(?:app\.)?egghead\.io/(?:course|playlist)s/(?P<id>[^/?#&]+)'
_TEST = { _TESTS = [{
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript', 'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
'playlist_count': 29, 'playlist_count': 29,
'info_dict': { 'info_dict': {
'id': '72', 'id': '432655',
'title': 'Professor Frisby Introduces Composable Functional JavaScript', 'title': 'Professor Frisby Introduces Composable Functional JavaScript',
'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$', 'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
}, },
} }, {
'url': 'https://app.egghead.io/playlists/professor-frisby-introduces-composable-functional-javascript',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = self._match_id(url) playlist_id = self._match_id(url)
@ -65,7 +68,7 @@ def _real_extract(self, url):
class EggheadLessonIE(EggheadBaseIE): class EggheadLessonIE(EggheadBaseIE):
IE_DESC = 'egghead.io lesson' IE_DESC = 'egghead.io lesson'
IE_NAME = 'egghead:lesson' IE_NAME = 'egghead:lesson'
_VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)' _VALID_URL = r'https://(?:app\.)?egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box', 'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
'info_dict': { 'info_dict': {
@ -88,6 +91,9 @@ class EggheadLessonIE(EggheadBaseIE):
}, { }, {
'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application', 'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://app.egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View file

@ -291,8 +291,7 @@
from .cultureunplugged import CultureUnpluggedIE from .cultureunplugged import CultureUnpluggedIE
from .curiositystream import ( from .curiositystream import (
CuriosityStreamIE, CuriosityStreamIE,
CuriosityStreamCollectionsIE, CuriosityStreamCollectionIE,
CuriosityStreamSeriesIE,
) )
from .cwtv import CWTVIE from .cwtv import CWTVIE
from .dailymail import DailyMailIE from .dailymail import DailyMailIE
@ -655,10 +654,6 @@
from .linuxacademy import LinuxAcademyIE from .linuxacademy import LinuxAcademyIE
from .litv import LiTVIE from .litv import LiTVIE
from .livejournal import LiveJournalIE from .livejournal import LiveJournalIE
from .liveleak import (
LiveLeakIE,
LiveLeakEmbedIE,
)
from .livestream import ( from .livestream import (
LivestreamIE, LivestreamIE,
LivestreamOriginalIE, LivestreamOriginalIE,

View file

@ -84,7 +84,6 @@
from .digiteka import DigitekaIE from .digiteka import DigitekaIE
from .arkena import ArkenaIE from .arkena import ArkenaIE
from .instagram import InstagramIE from .instagram import InstagramIE
from .liveleak import LiveLeakIE
from .threeqsdn import ThreeQSDNIE from .threeqsdn import ThreeQSDNIE
from .theplatform import ThePlatformIE from .theplatform import ThePlatformIE
from .kaltura import KalturaIE from .kaltura import KalturaIE
@ -1632,31 +1631,6 @@ class GenericIE(InfoExtractor):
'upload_date': '20160409', 'upload_date': '20160409',
}, },
}, },
# LiveLeak embed
{
'url': 'http://www.wykop.pl/link/3088787/',
'md5': '7619da8c820e835bef21a1efa2a0fc71',
'info_dict': {
'id': '874_1459135191',
'ext': 'mp4',
'title': 'Man shows poor quality of new apartment building',
'description': 'The wall is like a sand pile.',
'uploader': 'Lake8737',
},
'add_ie': [LiveLeakIE.ie_key()],
},
# Another LiveLeak embed pattern (#13336)
{
'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
'info_dict': {
'id': '2eb_1496309988',
'ext': 'mp4',
'title': 'Thief robs place where everyone was armed',
'description': 'md5:694d73ee79e535953cf2488562288eee',
'uploader': 'brazilwtf',
},
'add_ie': [LiveLeakIE.ie_key()],
},
# Duplicated embedded video URLs # Duplicated embedded video URLs
{ {
'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443', 'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
@ -3204,11 +3178,6 @@ def _real_extract(self, url):
return self.url_result( return self.url_result(
self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key()) self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
# Look for LiveLeak embeds
liveleak_urls = LiveLeakIE._extract_urls(webpage)
if liveleak_urls:
return self.playlist_from_matches(liveleak_urls, video_id, video_title)
# Look for 3Q SDN embeds # Look for 3Q SDN embeds
threeqsdn_url = ThreeQSDNIE._extract_url(webpage) threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
if threeqsdn_url: if threeqsdn_url:

View file

@ -1,191 +0,0 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
class LiveLeakIE(InfoExtractor):
_VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?.*?\b[it]=(?P<id>[\w_]+)'
_TESTS = [{
'url': 'http://www.liveleak.com/view?i=757_1364311680',
'md5': '0813c2430bea7a46bf13acf3406992f4',
'info_dict': {
'id': '757_1364311680',
'ext': 'mp4',
'description': 'extremely bad day for this guy..!',
'uploader': 'ljfriel2',
'title': 'Most unlucky car accident',
'thumbnail': r're:^https?://.*\.jpg$'
}
}, {
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
'info_dict': {
'id': 'f93_1390833151',
'ext': 'mp4',
'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
'uploader': 'ARD_Stinkt',
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
'thumbnail': r're:^https?://.*\.jpg$'
}
}, {
# Prochan embed
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
'md5': '42c6d97d54f1db107958760788c5f48f',
'info_dict': {
'id': '4f7_1392687779',
'ext': 'mp4',
'description': "The guy with the cigarette seems amazingly nonchalant about the whole thing... I really hope my friends' reactions would be a bit stronger.\r\n\r\nAction-go to 0:55.",
'uploader': 'CapObveus',
'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
'age_limit': 18,
},
'skip': 'Video is dead',
}, {
# Covers https://github.com/ytdl-org/youtube-dl/pull/5983
# Multiple resolutions
'url': 'http://www.liveleak.com/view?i=801_1409392012',
'md5': 'c3a449dbaca5c0d1825caecd52a57d7b',
'info_dict': {
'id': '801_1409392012',
'ext': 'mp4',
'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.',
'uploader': 'bony333',
'title': 'Crazy Hungarian tourist films close call waterspout in Croatia',
'thumbnail': r're:^https?://.*\.jpg$'
}
}, {
# Covers https://github.com/ytdl-org/youtube-dl/pull/10664#issuecomment-247439521
'url': 'http://m.liveleak.com/view?i=763_1473349649',
'add_ie': ['Youtube'],
'info_dict': {
'id': '763_1473349649',
'ext': 'mp4',
'title': 'Reporters and public officials ignore epidemic of black on asian violence in Sacramento | Colin Flaherty',
'description': 'Colin being the warrior he is and showing the injustice Asians in Sacramento are being subjected to.',
'uploader': 'Ziz',
'upload_date': '20160908',
'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw'
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.liveleak.com/view?i=677_1439397581',
'info_dict': {
'id': '677_1439397581',
'title': 'Fuel Depot in China Explosion caught on video',
},
'playlist_count': 3,
}, {
'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
'only_matching': True,
}, {
# No original video
'url': 'https://www.liveleak.com/view?t=C26ZZ_1558612804',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[ift]=[\w_]+[^"]+)"',
webpage)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
video_description = self._og_search_description(webpage)
video_uploader = self._html_search_regex(
r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
age_limit = int_or_none(self._search_regex(
r'you confirm that you are ([0-9]+) years and over.',
webpage, 'age limit', default=None))
video_thumbnail = self._og_search_thumbnail(webpage)
entries = self._parse_html5_media_entries(url, webpage, video_id)
if not entries:
# Maybe an embed?
embed_url = self._search_regex(
r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
webpage, 'embed URL')
return {
'_type': 'url_transparent',
'url': embed_url,
'id': video_id,
'title': video_title,
'description': video_description,
'uploader': video_uploader,
'age_limit': age_limit,
}
for idx, info_dict in enumerate(entries):
formats = []
for a_format in info_dict['formats']:
if not a_format.get('height'):
a_format['height'] = int_or_none(self._search_regex(
r'([0-9]+)p\.mp4', a_format['url'], 'height label',
default=None))
formats.append(a_format)
# Removing '.*.mp4' gives the raw video, which is essentially
# the same video without the LiveLeak logo at the top (see
# https://github.com/ytdl-org/youtube-dl/pull/4768)
orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url'])
if a_format['url'] != orig_url:
format_id = a_format.get('format_id')
format_id = 'original' + ('-' + format_id if format_id else '')
if self._is_valid_url(orig_url, video_id, format_id):
formats.append({
'format_id': format_id,
'url': orig_url,
'quality': 1,
})
self._sort_formats(formats)
info_dict['formats'] = formats
# Don't append entry ID for one-video pages to keep backward compatibility
if len(entries) > 1:
info_dict['id'] = '%s_%s' % (video_id, idx + 1)
else:
info_dict['id'] = video_id
info_dict.update({
'title': video_title,
'description': video_description,
'uploader': video_uploader,
'age_limit': age_limit,
'thumbnail': video_thumbnail,
})
return self.playlist_result(entries, video_id, video_title)
class LiveLeakEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[ift])=(?P<id>[\w_]+)'
# See generic.py for actual test cases
_TESTS = [{
'url': 'https://www.liveleak.com/ll_embed?i=874_1459135191',
'only_matching': True,
}, {
'url': 'https://www.liveleak.com/ll_embed?f=ab065df993c1',
'only_matching': True,
}]
def _real_extract(self, url):
kind, video_id = re.match(self._VALID_URL, url).groups()
if kind == 'f':
webpage = self._download_webpage(url, video_id)
liveleak_url = self._search_regex(
r'(?:logourl\s*:\s*|window\.open\()(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
webpage, 'LiveLeak URL', group='url')
else:
liveleak_url = 'http://www.liveleak.com/view?%s=%s' % (kind, video_id)
return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())

View file

@ -58,7 +58,7 @@ def _raise_error(self, data):
def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None): def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None):
return self._download_json( return self._download_json(
urljoin('http://psapi.nrk.no/', path), urljoin('https://psapi.nrk.no/', path),
video_id, note or 'Downloading %s JSON' % item, video_id, note or 'Downloading %s JSON' % item,
fatal=fatal, query=query, fatal=fatal, query=query,
headers={'Accept-Encoding': 'gzip, deflate, br'}) headers={'Accept-Encoding': 'gzip, deflate, br'})

View file

@ -98,6 +98,9 @@ def _real_extract(self, url):
elif ext == 'f4m': elif ext == 'f4m':
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
src, video_id, f4m_id=format_id, fatal=False)) src, video_id, f4m_id=format_id, fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
src, video_id, mpd_id=format_id, fatal=False))
else: else:
formats.append({ formats.append({
'format_id': format_id, 'format_id': format_id,

View file

@ -31,6 +31,7 @@
class PornHubBaseIE(InfoExtractor): class PornHubBaseIE(InfoExtractor):
_NETRC_MACHINE = 'pornhub' _NETRC_MACHINE = 'pornhub'
_PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubthbh7ap3u\.onion)'
def _download_webpage_handle(self, *args, **kwargs): def _download_webpage_handle(self, *args, **kwargs):
def dl(*args, **kwargs): def dl(*args, **kwargs):
@ -123,11 +124,13 @@ class PornHubIE(PornHubBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| (?:[^/]+\.)?
%s
/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/ (?:www\.)?thumbzilla\.com/video/
) )
(?P<id>[\da-z]+) (?P<id>[\da-z]+)
''' ''' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{ _TESTS = [{
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
'md5': 'a6391306d050e4547f62b3f485dd9ba9', 'md5': 'a6391306d050e4547f62b3f485dd9ba9',
@ -238,6 +241,13 @@ class PornHubIE(PornHubBaseIE):
}, { }, {
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3', 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
'only_matching': True, 'only_matching': True,
}, {
# geo restricted
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156',
'only_matching': True,
}, {
'url': 'http://pornhubthbh7ap3u.onion/view_video.php?viewkey=ph5a9813bfa7156',
'only_matching': True,
}] }]
@staticmethod @staticmethod
@ -277,6 +287,11 @@ def dl_webpage(platform):
'PornHub said: %s' % error_msg, 'PornHub said: %s' % error_msg,
expected=True, video_id=video_id) expected=True, video_id=video_id)
if any(re.search(p, webpage) for p in (
r'class=["\']geoBlocked["\']',
r'>\s*This content is unavailable in your country')):
self.raise_geo_restricted()
# video_title from flashvars contains whitespace instead of non-ASCII (see # video_title from flashvars contains whitespace instead of non-ASCII (see
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
# on that anymore. # on that anymore.
@ -410,17 +425,14 @@ def add_format(format_url, height=None):
format_url, video_id, 'mp4', entry_protocol='m3u8_native', format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)) m3u8_id='hls', fatal=False))
return return
tbr = None if not height:
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url) height = int_or_none(self._search_regex(
if mobj: r'(?P<height>\d+)[pP]?_\d+[kK]', format_url, 'height',
if not height: default=None))
height = int(mobj.group('height'))
tbr = int(mobj.group('tbr'))
formats.append({ formats.append({
'url': format_url, 'url': format_url,
'format_id': '%dp' % height if height else None, 'format_id': '%dp' % height if height else None,
'height': height, 'height': height,
'tbr': tbr,
}) })
for video_url, height in video_urls: for video_url, height in video_urls:
@ -442,7 +454,10 @@ def add_format(format_url, height=None):
add_format(video_url, height) add_format(video_url, height)
continue continue
add_format(video_url) add_format(video_url)
self._sort_formats(formats)
# field_preference is unnecessary here, but kept for code-similarity with youtube-dl
self._sort_formats(
formats, field_preference=('height', 'width', 'fps', 'format_id'))
video_uploader = self._html_search_regex( video_uploader = self._html_search_regex(
r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<', r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
@ -516,7 +531,7 @@ def _extract_entries(self, webpage, host):
class PornHubUserIE(PornHubPlaylistBaseIE): class PornHubUserIE(PornHubPlaylistBaseIE):
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{ _TESTS = [{
'url': 'https://www.pornhub.com/model/zoe_ph', 'url': 'https://www.pornhub.com/model/zoe_ph',
'playlist_mincount': 118, 'playlist_mincount': 118,
@ -545,6 +560,9 @@ class PornHubUserIE(PornHubPlaylistBaseIE):
# Same as before, multi page # Same as before, multi page
'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau', 'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -620,7 +638,7 @@ def _real_extract(self, url):
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?P<id>(?:[^/]+/)*[^/?#&]+)' _VALID_URL = r'https?://(?:[^/]+\.)?%s/(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{ _TESTS = [{
'url': 'https://www.pornhub.com/model/zoe_ph/videos', 'url': 'https://www.pornhub.com/model/zoe_ph/videos',
'only_matching': True, 'only_matching': True,
@ -725,6 +743,9 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
}, { }, {
'url': 'https://de.pornhub.com/playlist/4667351', 'url': 'https://de.pornhub.com/playlist/4667351',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph/videos',
'only_matching': True,
}] }]
@classmethod @classmethod
@ -735,7 +756,7 @@ def suitable(cls, url):
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{ _TESTS = [{
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
'info_dict': { 'info_dict': {
@ -745,4 +766,7 @@ class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
}, { }, {
'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://pornhubthbh7ap3u.onion/pornstar/jenny-blighe/videos/upload',
'only_matching': True,
}] }]

View file

@ -28,7 +28,7 @@ class UMGDeIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_data = self._download_json( video_data = self._download_json(
'https://api.universal-music.de/graphql', 'https://graphql.universal-music.de/',
video_id, query={ video_id, query={
'query': '''{ 'query': '''{
universalMusic(channel:16) { universalMusic(channel:16) {
@ -56,11 +56,9 @@ def _real_extract(self, url):
formats = [] formats = []
def add_m3u8_format(format_id): def add_m3u8_format(format_id):
m3u8_formats = self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
hls_url_template % format_id, video_id, 'mp4', hls_url_template % format_id, video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal='False') 'm3u8_native', m3u8_id='hls', fatal=False))
if m3u8_formats and m3u8_formats[0].get('height'):
formats.extend(m3u8_formats)
for f in video_data.get('formats', []): for f in video_data.get('formats', []):
f_url = f.get('url') f_url = f.get('url')

View file

@ -464,20 +464,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
r'(?:www\.)?invidious\.pussthecat\.org', r'(?:www\.)?invidious\.pussthecat\.org',
r'(?:www\.)?invidious\.zee\.li', r'(?:www\.)?invidious\.zee\.li',
r'(?:(?:www|au)\.)?ytprivate\.com',
r'(?:www\.)?invidious\.namazso\.eu',
r'(?:www\.)?invidious\.ethibox\.fr', r'(?:www\.)?invidious\.ethibox\.fr',
r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion', r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
# youtube-dl invidious instances list # youtube-dl invidious instances list
r'(?:(?:www|no)\.)?invidiou\.sh', r'(?:(?:www|no)\.)?invidiou\.sh',
r'(?:(?:www|fi)\.)?invidious\.snopyta\.org', r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
r'(?:www\.)?invidious\.kabi\.tk', r'(?:www\.)?invidious\.kabi\.tk',
r'(?:www\.)?invidious\.mastodon\.host', r'(?:www\.)?invidious\.mastodon\.host',
r'(?:www\.)?invidious\.zapashcanon\.fr', r'(?:www\.)?invidious\.zapashcanon\.fr',
r'(?:www\.)?invidious\.kavin\.rocks', r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
r'(?:www\.)?invidious\.tinfoil-hat\.net', r'(?:www\.)?invidious\.tinfoil-hat\.net',
r'(?:www\.)?invidious\.himiko\.cloud', r'(?:www\.)?invidious\.himiko\.cloud',
r'(?:www\.)?invidious\.reallyancient\.tech', r'(?:www\.)?invidious\.reallyancient\.tech',
@ -504,6 +499,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'(?:www\.)?invidious\.toot\.koeln', r'(?:www\.)?invidious\.toot\.koeln',
r'(?:www\.)?invidious\.fdn\.fr', r'(?:www\.)?invidious\.fdn\.fr',
r'(?:www\.)?watch\.nettohikari\.com', r'(?:www\.)?watch\.nettohikari\.com',
r'(?:www\.)?invidious\.namazso\.eu',
r'(?:www\.)?invidious\.silkky\.cloud',
r'(?:www\.)?invidious\.exonip\.de',
r'(?:www\.)?invidious\.riverside\.rocks',
r'(?:www\.)?invidious\.blamefran\.net',
r'(?:www\.)?invidious\.moomoo\.de',
r'(?:www\.)?ytb\.trom\.tf',
r'(?:www\.)?yt\.cyberhost\.uk',
r'(?:www\.)?kgg2m7yk5aybusll\.onion', r'(?:www\.)?kgg2m7yk5aybusll\.onion',
r'(?:www\.)?qklhadlycap4cnod\.onion', r'(?:www\.)?qklhadlycap4cnod\.onion',
r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion', r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
@ -512,6 +515,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion', r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p', r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion', r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
) )
_VALID_URL = r"""(?x)^ _VALID_URL = r"""(?x)^
( (
@ -1923,9 +1930,9 @@ def get_text(x):
'c': 'WEB_REMIX', 'c': 'WEB_REMIX',
'cver': '0.1', 'cver': '0.1',
'cplayer': 'UNIPLAYER', 'cplayer': 'UNIPLAYER',
}, fatal=False)), }, fatal=False) or ''),
lambda x: x['player_response'][0], lambda x: x['player_response'][0],
compat_str) or '{}', video_id) compat_str) or '{}', video_id, fatal=False)
ytm_streaming_data = ytm_player_response.get('streamingData') or {} ytm_streaming_data = ytm_player_response.get('streamingData') or {}
player_response = None player_response = None