[murrtube] Fix extractor (#7500)

This commit is contained in:
DrakoCpp 2024-02-19 20:47:49 +00:00
parent 4f04347909
commit 8b0dc0946c
2 changed files with 61 additions and 145 deletions

View file

@ -1143,7 +1143,7 @@
MTVItaliaProgrammaIE,
)
from .muenchentv import MuenchenTVIE
from .murrtube import MurrtubeIE, MurrtubeUserIE
from .murrtube import MurrtubeIE
from .museai import MuseAIIE
from .musescore import MuseScoreIE
from .musicdex import (

View file

@ -1,13 +1,8 @@
import functools
import json
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
determine_ext,
int_or_none,
try_get,
urlencode_postdata,
extract_attributes
)
@ -15,148 +10,69 @@ class MurrtubeIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
murrtube:|
https?://murrtube\.net/videos/(?P<slug>[a-z0-9\-]+)\-
https?://murrtube\.net/v/|
https?://murrtube\.net/videos/(?P<slug>[a-z0-9\-]+?)\-
)
(?P<id>[a-f0-9]{8}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{12})
(?P<id>[A-Z0-9]{4}|[a-f0-9]{8}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{12})
'''
_TEST = {
'url': 'https://murrtube.net/videos/inferno-x-skyler-148b6f2a-fdcc-4902-affe-9c0f41aaaca0',
'md5': '169f494812d9a90914b42978e73aa690',
'info_dict': {
'id': '148b6f2a-fdcc-4902-affe-9c0f41aaaca0',
'ext': 'mp4',
'title': 'Inferno X Skyler',
'description': 'Humping a very good slutty sheppy (roomate)',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 284,
'uploader': 'Inferno Wolf',
'age_limit': 18,
'comment_count': int,
'view_count': int,
'like_count': int,
'tags': ['hump', 'breed', 'Fursuit', 'murrsuit', 'bareback'],
}
}
def _download_gql(self, video_id, op, note=None, fatal=True):
result = self._download_json(
'https://murrtube.net/graphql',
video_id, note, data=json.dumps(op).encode(), fatal=fatal,
headers={'Content-Type': 'application/json'})
return result['data']
_TESTS = [
{
"url": "https://murrtube.net/videos/inferno-x-skyler-148b6f2a-fdcc-4902-affe-9c0f41aaaca0",
"md5": "169f494812d9a90914b42978e73aa690",
"info_dict": {
"id": "ca885d8456b95de529b6723b158032e11115d",
"ext": "mp4",
"title": "Inferno X Skyler",
"description": "Humping a very good slutty sheppy (roomate)",
"uploader": "Inferno Wolf",
"age_limit": 18,
},
},
{
"url": "https://murrtube.net/v/0J2Q",
"md5": "757e53c0795a03d53bb4ca243f851aba",
"info_dict": {
"id": "8442998c52134968d9caa36e473e1a6bac6ca",
"uploader": "Hayel",
"title": "Who's in charge now",
"description": """Fenny sneaked into my bed room and played naughty with one of my plushies. I caught him in the act and wanted to punish him. He thought he was in charge and wanted to use me instead but he wasn't prepared on my butt milking him within just a minute.
Fenny: @fenny_ad (both here and on Twitter)
Hayel on Twitter: https://twitter.com/plushmods""",
"age_limit": 18,
}
}
]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_gql(video_id, {
'operationName': 'Medium',
'variables': {
'id': video_id,
},
'query': '''\
query Medium($id: ID!) {
medium(id: $id) {
title
description
key
duration
commentsCount
likesCount
viewsCount
thumbnailKey
tagList
user {
name
__typename
}
__typename
}
}'''})
meta = data['medium']
storage_url = 'https://storage.murrtube.net/murrtube/'
format_url = storage_url + meta.get('key', '')
thumbnail = storage_url + meta.get('thumbnailKey', '')
if determine_ext(format_url) == 'm3u8':
formats = self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native', fatal=False)
else:
formats = [{'url': format_url}]
video_id = self._match_valid_url(url)
# TODO: This part could be smarter (Set and store age cookie?)
video_page = self._download_webpage(
'https://murrtube.net', None, note='Getting session token')
data = self._hidden_inputs(video_page)
self._download_webpage(
'https://murrtube.net/accept_age_check', None, 'Set age cookie', data=urlencode_postdata(data))
video_page = self._download_webpage(url, None)
video_attrs = extract_attributes(self._search_regex(r'(<video[^>]+>)', video_page, 'video'))
playlist = video_attrs['data-url'].split('?')[0]
matches = re.compile(r'https://storage.murrtube.net/murrtube-production/.+/(?P<id>.+)/index.m3u8').match(playlist).groupdict()
video_id = matches['id']
formats = self._extract_m3u8_formats(playlist, video_id, 'mp4', entry_protocol='m3u8_native', fatal=False)
title = self._html_search_meta(
'og:title', video_page, display_name='title', fatal=True)[:-11]
description = self._html_search_meta(
'og:description', video_page, display_name='description', fatal=True)
thumbnail = self._html_search_meta(
'og:image', video_page, display_name='thumbnail', fatal=True)
uploader = self._html_search_regex(
r'<span class="pl-1 is-size-6 has-text-lighter">(.+?)</span>', video_page, 'uploader', default=None)
return {
'id': video_id,
'title': meta.get('title'),
'description': meta.get('description'),
'formats': formats,
'thumbnail': thumbnail,
'duration': int_or_none(meta.get('duration')),
'uploader': try_get(meta, lambda x: x['user']['name']),
'view_count': meta.get('viewsCount'),
'like_count': meta.get('likesCount'),
'comment_count': meta.get('commentsCount'),
'tags': meta.get('tagList'),
'title': title,
'age_limit': 18,
'formats': formats,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
}
class MurrtubeUserIE(MurrtubeIE): # XXX: Do not subclass from concrete IE
IE_DESC = 'Murrtube user profile'
_VALID_URL = r'https?://murrtube\.net/(?P<id>[^/]+)$'
_TEST = {
'url': 'https://murrtube.net/stormy',
'info_dict': {
'id': 'stormy',
},
'playlist_mincount': 27,
}
_PAGE_SIZE = 10
def _fetch_page(self, username, user_id, page):
data = self._download_gql(username, {
'operationName': 'Media',
'variables': {
'limit': self._PAGE_SIZE,
'offset': page * self._PAGE_SIZE,
'sort': 'latest',
'userId': user_id,
},
'query': '''\
query Media($q: String, $sort: String, $userId: ID, $offset: Int!, $limit: Int!) {
media(q: $q, sort: $sort, userId: $userId, offset: $offset, limit: $limit) {
id
__typename
}
}'''},
'Downloading page {0}'.format(page + 1))
if data is None:
raise ExtractorError(f'Failed to retrieve video list for page {page + 1}')
media = data['media']
for entry in media:
yield self.url_result('murrtube:{0}'.format(entry['id']), MurrtubeIE.ie_key())
def _real_extract(self, url):
username = self._match_id(url)
data = self._download_gql(username, {
'operationName': 'User',
'variables': {
'id': username,
},
'query': '''\
query User($id: ID!) {
user(id: $id) {
id
__typename
}
}'''},
'Downloading user info')
if data is None:
raise ExtractorError('Failed to fetch user info')
user = data['user']
entries = OnDemandPagedList(functools.partial(
self._fetch_page, username, user.get('id')), self._PAGE_SIZE)
return self.playlist_result(entries, username)