mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-09-28 21:57:57 +00:00
[murrtube] Fix extractor (#7500)
This commit is contained in:
parent
4f04347909
commit
8b0dc0946c
|
@ -1143,7 +1143,7 @@
|
|||
MTVItaliaProgrammaIE,
|
||||
)
|
||||
from .muenchentv import MuenchenTVIE
|
||||
from .murrtube import MurrtubeIE, MurrtubeUserIE
|
||||
from .murrtube import MurrtubeIE
|
||||
from .museai import MuseAIIE
|
||||
from .musescore import MuseScoreIE
|
||||
from .musicdex import (
|
||||
|
|
|
@ -1,13 +1,8 @@
|
|||
import functools
|
||||
import json
|
||||
|
||||
import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
extract_attributes
|
||||
)
|
||||
|
||||
|
||||
|
@ -15,148 +10,69 @@ class MurrtubeIE(InfoExtractor):
|
|||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
murrtube:|
|
||||
https?://murrtube\.net/videos/(?P<slug>[a-z0-9\-]+)\-
|
||||
https?://murrtube\.net/v/|
|
||||
https?://murrtube\.net/videos/(?P<slug>[a-z0-9\-]+?)\-
|
||||
)
|
||||
(?P<id>[a-f0-9]{8}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{12})
|
||||
(?P<id>[A-Z0-9]{4}|[a-f0-9]{8}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{12})
|
||||
'''
|
||||
_TEST = {
|
||||
'url': 'https://murrtube.net/videos/inferno-x-skyler-148b6f2a-fdcc-4902-affe-9c0f41aaaca0',
|
||||
'md5': '169f494812d9a90914b42978e73aa690',
|
||||
'info_dict': {
|
||||
'id': '148b6f2a-fdcc-4902-affe-9c0f41aaaca0',
|
||||
'ext': 'mp4',
|
||||
'title': 'Inferno X Skyler',
|
||||
'description': 'Humping a very good slutty sheppy (roomate)',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 284,
|
||||
'uploader': 'Inferno Wolf',
|
||||
'age_limit': 18,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': ['hump', 'breed', 'Fursuit', 'murrsuit', 'bareback'],
|
||||
}
|
||||
}
|
||||
|
||||
def _download_gql(self, video_id, op, note=None, fatal=True):
|
||||
result = self._download_json(
|
||||
'https://murrtube.net/graphql',
|
||||
video_id, note, data=json.dumps(op).encode(), fatal=fatal,
|
||||
headers={'Content-Type': 'application/json'})
|
||||
return result['data']
|
||||
_TESTS = [
|
||||
{
|
||||
"url": "https://murrtube.net/videos/inferno-x-skyler-148b6f2a-fdcc-4902-affe-9c0f41aaaca0",
|
||||
"md5": "169f494812d9a90914b42978e73aa690",
|
||||
"info_dict": {
|
||||
"id": "ca885d8456b95de529b6723b158032e11115d",
|
||||
"ext": "mp4",
|
||||
"title": "Inferno X Skyler",
|
||||
"description": "Humping a very good slutty sheppy (roomate)",
|
||||
"uploader": "Inferno Wolf",
|
||||
"age_limit": 18,
|
||||
},
|
||||
},
|
||||
{
|
||||
"url": "https://murrtube.net/v/0J2Q",
|
||||
"md5": "757e53c0795a03d53bb4ca243f851aba",
|
||||
"info_dict": {
|
||||
"id": "8442998c52134968d9caa36e473e1a6bac6ca",
|
||||
"uploader": "Hayel",
|
||||
"title": "Who's in charge now?",
|
||||
"description": """Fenny sneaked into my bed room and played naughty with one of my plushies. I caught him in the act and wanted to punish him. He thought he was in charge and wanted to use me instead but he wasn't prepared on my butt milking him within just a minute.
|
||||
|
||||
Fenny: @fenny_ad (both here and on Twitter)
|
||||
Hayel on Twitter: https://twitter.com/plushmods""",
|
||||
"age_limit": 18,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_gql(video_id, {
|
||||
'operationName': 'Medium',
|
||||
'variables': {
|
||||
'id': video_id,
|
||||
},
|
||||
'query': '''\
|
||||
query Medium($id: ID!) {
|
||||
medium(id: $id) {
|
||||
title
|
||||
description
|
||||
key
|
||||
duration
|
||||
commentsCount
|
||||
likesCount
|
||||
viewsCount
|
||||
thumbnailKey
|
||||
tagList
|
||||
user {
|
||||
name
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
}'''})
|
||||
meta = data['medium']
|
||||
|
||||
storage_url = 'https://storage.murrtube.net/murrtube/'
|
||||
format_url = storage_url + meta.get('key', '')
|
||||
thumbnail = storage_url + meta.get('thumbnailKey', '')
|
||||
|
||||
if determine_ext(format_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native', fatal=False)
|
||||
else:
|
||||
formats = [{'url': format_url}]
|
||||
|
||||
video_id = self._match_valid_url(url)
|
||||
# TODO: This part could be smarter (Set and store age cookie?)
|
||||
video_page = self._download_webpage(
|
||||
'https://murrtube.net', None, note='Getting session token')
|
||||
data = self._hidden_inputs(video_page)
|
||||
self._download_webpage(
|
||||
'https://murrtube.net/accept_age_check', None, 'Set age cookie', data=urlencode_postdata(data))
|
||||
video_page = self._download_webpage(url, None)
|
||||
video_attrs = extract_attributes(self._search_regex(r'(<video[^>]+>)', video_page, 'video'))
|
||||
playlist = video_attrs['data-url'].split('?')[0]
|
||||
matches = re.compile(r'https://storage.murrtube.net/murrtube-production/.+/(?P<id>.+)/index.m3u8').match(playlist).groupdict()
|
||||
video_id = matches['id']
|
||||
formats = self._extract_m3u8_formats(playlist, video_id, 'mp4', entry_protocol='m3u8_native', fatal=False)
|
||||
title = self._html_search_meta(
|
||||
'og:title', video_page, display_name='title', fatal=True)[:-11]
|
||||
description = self._html_search_meta(
|
||||
'og:description', video_page, display_name='description', fatal=True)
|
||||
thumbnail = self._html_search_meta(
|
||||
'og:image', video_page, display_name='thumbnail', fatal=True)
|
||||
uploader = self._html_search_regex(
|
||||
r'<span class="pl-1 is-size-6 has-text-lighter">(.+?)</span>', video_page, 'uploader', default=None)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': meta.get('title'),
|
||||
'description': meta.get('description'),
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': int_or_none(meta.get('duration')),
|
||||
'uploader': try_get(meta, lambda x: x['user']['name']),
|
||||
'view_count': meta.get('viewsCount'),
|
||||
'like_count': meta.get('likesCount'),
|
||||
'comment_count': meta.get('commentsCount'),
|
||||
'tags': meta.get('tagList'),
|
||||
'title': title,
|
||||
'age_limit': 18,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
}
|
||||
|
||||
|
||||
class MurrtubeUserIE(MurrtubeIE): # XXX: Do not subclass from concrete IE
|
||||
IE_DESC = 'Murrtube user profile'
|
||||
_VALID_URL = r'https?://murrtube\.net/(?P<id>[^/]+)$'
|
||||
_TEST = {
|
||||
'url': 'https://murrtube.net/stormy',
|
||||
'info_dict': {
|
||||
'id': 'stormy',
|
||||
},
|
||||
'playlist_mincount': 27,
|
||||
}
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
def _fetch_page(self, username, user_id, page):
|
||||
data = self._download_gql(username, {
|
||||
'operationName': 'Media',
|
||||
'variables': {
|
||||
'limit': self._PAGE_SIZE,
|
||||
'offset': page * self._PAGE_SIZE,
|
||||
'sort': 'latest',
|
||||
'userId': user_id,
|
||||
},
|
||||
'query': '''\
|
||||
query Media($q: String, $sort: String, $userId: ID, $offset: Int!, $limit: Int!) {
|
||||
media(q: $q, sort: $sort, userId: $userId, offset: $offset, limit: $limit) {
|
||||
id
|
||||
__typename
|
||||
}
|
||||
}'''},
|
||||
'Downloading page {0}'.format(page + 1))
|
||||
if data is None:
|
||||
raise ExtractorError(f'Failed to retrieve video list for page {page + 1}')
|
||||
|
||||
media = data['media']
|
||||
|
||||
for entry in media:
|
||||
yield self.url_result('murrtube:{0}'.format(entry['id']), MurrtubeIE.ie_key())
|
||||
|
||||
def _real_extract(self, url):
|
||||
username = self._match_id(url)
|
||||
data = self._download_gql(username, {
|
||||
'operationName': 'User',
|
||||
'variables': {
|
||||
'id': username,
|
||||
},
|
||||
'query': '''\
|
||||
query User($id: ID!) {
|
||||
user(id: $id) {
|
||||
id
|
||||
__typename
|
||||
}
|
||||
}'''},
|
||||
'Downloading user info')
|
||||
if data is None:
|
||||
raise ExtractorError('Failed to fetch user info')
|
||||
|
||||
user = data['user']
|
||||
|
||||
entries = OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, username, user.get('id')), self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(entries, username)
|
||||
|
|
Loading…
Reference in a new issue