[ie/medici] Fix extractor (#9518)

Closes #8813
Authored by: Offert4324
This commit is contained in:
Offert4324 2024-04-05 01:45:19 +09:00 committed by GitHub
parent 0ae16ceb18
commit 4cd9e251b9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 134 additions and 64 deletions

View file

@ -2104,22 +2104,6 @@ class GenericIE(InfoExtractor):
'age_limit': 0, 'age_limit': 0,
}, },
}, },
{
'note': 'JW Player embed with unicode-escape sequences in URL',
'url': 'https://www.medici.tv/en/concerts/lahav-shani-mozart-mahler-israel-philharmonic-abu-dhabi-classics',
'info_dict': {
'id': 'm',
'ext': 'mp4',
'title': 'Lahav Shani conducts the Israel Philharmonic\'s first-ever concert in Abu Dhabi',
'description': 'Mahler\'s ',
'uploader': 'www.medici.tv',
'age_limit': 0,
'thumbnail': r're:^https?://.+\.jpg',
},
'params': {
'skip_download': True,
},
},
{ {
'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/', 'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/',
'md5': 'e2f0a4c329f7986280b7328e24036d60', 'md5': 'e2f0a4c329f7986280b7328e24036d60',

View file

@ -1,67 +1,153 @@
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
unified_strdate, filter_dict,
update_url_query, parse_iso8601,
urlencode_postdata, traverse_obj,
try_call,
url_or_none,
) )
class MediciIE(InfoExtractor): class MediciIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?medici\.tv/#!/(?P<id>[^?#&]+)' _VALID_URL = r'https?://(?:(?P<sub>www|edu)\.)?medici\.tv/[a-z]{2}/[\w.-]+/(?P<id>[^/?#&]+)'
_TEST = { _TESTS = [{
'url': 'http://www.medici.tv/#!/daniel-harding-frans-helmerson-verbier-festival-music-camp', 'url': 'https://www.medici.tv/en/operas/thomas-ades-the-exterminating-angel-calixto-bieito-opera-bastille-paris',
'md5': '004c21bb0a57248085b6ff3fec72719d', 'md5': 'd483f74e7a7a9eac0dbe152ab189050d',
'info_dict': { 'info_dict': {
'id': '3059', 'id': '8032',
'ext': 'flv', 'ext': 'mp4',
'title': 'Daniel Harding conducts the Verbier Festival Music Camp \u2013 With Frans Helmerson', 'title': 'Thomas Adès\'s The Exterminating Angel',
'description': 'md5:322a1e952bafb725174fd8c1a8212f58', 'description': 'md5:708ae6350dadc604225b4a6e32482bab',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:https://.+/.+\.jpg',
'upload_date': '20170408', 'upload_date': '20240304',
'timestamp': 1709561766,
'display_id': 'thomas-ades-the-exterminating-angel-calixto-bieito-opera-bastille-paris',
}, },
} 'expected_warnings': [r'preview'],
}, {
'url': 'https://edu.medici.tv/en/operas/wagner-lohengrin-paris-opera-kirill-serebrennikov-piotr-beczala-kwangchul-youn-johanni-van-oostrum',
'md5': '4ef3f4079a6e1c617584463a9eb84f99',
'info_dict': {
'id': '7900',
'ext': 'mp4',
'title': 'Wagner\'s Lohengrin',
'description': 'md5:a384a62937866101f86902f21752cd89',
'thumbnail': r're:https://.+/.+\.jpg',
'upload_date': '20231017',
'timestamp': 1697554771,
'display_id': 'wagner-lohengrin-paris-opera-kirill-serebrennikov-piotr-beczala-kwangchul-youn-johanni-van-oostrum',
},
'expected_warnings': [r'preview'],
}, {
'url': 'https://www.medici.tv/en/concerts/sergey-smbatyan-conducts-mansurian-chouchane-siranossian-mario-brunello',
'md5': '9dd757e53b22b2511e85ea9ea60e4815',
'info_dict': {
'id': '5712',
'ext': 'mp4',
'title': 'Sergey Smbatyan conducts Tigran Mansurian — With Chouchane Siranossian and Mario Brunello',
'thumbnail': r're:https://.+/.+\.jpg',
'description': 'md5:9411fe44c874bb10e9af288c65816e41',
'upload_date': '20200323',
'timestamp': 1584975600,
'display_id': 'sergey-smbatyan-conducts-mansurian-chouchane-siranossian-mario-brunello',
},
'expected_warnings': [r'preview'],
}, {
'url': 'https://www.medici.tv/en/ballets/carmen-ballet-choregraphie-de-jiri-bubenicek-teatro-dellopera-di-roma',
'md5': '40f5e76cb701a97a6d7ba23b62c49990',
'info_dict': {
'id': '7857',
'ext': 'mp4',
'title': 'Carmen by Jiří Bubeníček after Roland Petit, music by Bizet, de Falla, Castelnuovo-Tedesco, and Bonolis',
'thumbnail': r're:https://.+/.+\.jpg',
'description': 'md5:0f15a15611ed748020c769873e10a8bb',
'upload_date': '20240223',
'timestamp': 1708707600,
'display_id': 'carmen-ballet-choregraphie-de-jiri-bubenicek-teatro-dellopera-di-roma',
},
'expected_warnings': [r'preview'],
}, {
'url': 'https://www.medici.tv/en/documentaries/la-sonnambula-liege-2023-documentaire',
'md5': '87ff198018ce79a34757ab0dd6f21080',
'info_dict': {
'id': '7513',
'ext': 'mp4',
'title': 'La Sonnambula',
'thumbnail': r're:https://.+/.+\.jpg',
'description': 'md5:0caf9109a860fd50cd018df062a67f34',
'upload_date': '20231103',
'timestamp': 1699010830,
'display_id': 'la-sonnambula-liege-2023-documentaire',
},
'expected_warnings': [r'preview'],
}, {
'url': 'https://edu.medici.tv/en/masterclasses/yvonne-loriod-olivier-messiaen',
'md5': 'fb5dcec46d76ad20fbdbaabb01da191d',
'info_dict': {
'id': '3024',
'ext': 'mp4',
'title': 'Olivier Messiaen and Yvonne Loriod, pianists and teachers',
'thumbnail': r're:https://.+/.+\.jpg',
'description': 'md5:aab948e2f7690214b5c28896c83f1fc1',
'upload_date': '20150223',
'timestamp': 1424706608,
'display_id': 'yvonne-loriod-olivier-messiaen',
},
'skip': 'Requires authentication; preview starts in the middle',
}, {
'url': 'https://www.medici.tv/en/jazz/makaya-mccraven-la-rochelle',
'md5': '4cc279a8b06609782747c8f50beea2b3',
'info_dict': {
'id': '7922',
'ext': 'mp4',
'title': 'NEW: Makaya McCraven in La Rochelle',
'thumbnail': r're:https://.+/.+\.jpg',
'description': 'md5:b5a8aaeb6993d8ccb18bde8abb8aa8d2',
'upload_date': '20231228',
'timestamp': 1703754863,
'display_id': 'makaya-mccraven-la-rochelle',
},
'expected_warnings': [r'preview'],
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) display_id, subdomain = self._match_valid_url(url).group('id', 'sub')
self._request_webpage(url, display_id, 'Requesting CSRF token cookie')
# Sets csrftoken cookie subdomain = 'edu-' if subdomain == 'edu' else ''
self._download_webpage(url, video_id) origin = f'https://{urllib.parse.urlparse(url).hostname}'
MEDICI_URL = 'http://www.medici.tv/'
data = self._download_json( data = self._download_json(
MEDICI_URL, video_id, f'https://api.medici.tv/{subdomain}satie/edito/movie-file/{display_id}/', display_id,
data=urlencode_postdata({ headers=filter_dict({
'json': 'true', 'Authorization': try_call(
'page': '/%s' % video_id, lambda: urllib.parse.unquote(self._get_cookies(url)['auth._token.mAuth'].value)),
'timezone_offset': -420, 'Device-Type': 'web',
}), headers={ 'Origin': origin,
'X-CSRFToken': self._get_cookies(url)['csrftoken'].value, 'Referer': f'{origin}/',
'X-Requested-With': 'XMLHttpRequest', 'Accept': 'application/json, text/plain, */*',
'Referer': MEDICI_URL, }))
'Content-Type': 'application/x-www-form-urlencoded',
})
video = data['video']['videos']['video1'] if not traverse_obj(data, ('video', 'is_full_video')) and traverse_obj(
data, ('video', 'is_limited_by_user_access')):
self.report_warning(
'The full video is for subscribers only. Only previews will be downloaded. If you '
'have used the --cookies-from-browser option, try using the --cookies option instead')
title = video.get('nom') or data['title'] formats, subtitles = self._extract_m3u8_formats_and_subtitles(
data['video']['video_url'], display_id, 'mp4')
video_id = video.get('id') or video_id
formats = self._extract_f4m_formats(
update_url_query(video['url_akamai'], {
'hdcore': '3.1.0',
'plugin=aasp': '3.1.0.43.124',
}), video_id, f4m_id='hds')
description = data.get('meta_description')
thumbnail = video.get('url_thumbnail') or data.get('main_image')
upload_date = unified_strdate(data['video'].get('date'))
return { return {
'id': video_id, 'id': str(data['id']),
'title': title, 'display_id': display_id,
'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date,
'formats': formats, 'formats': formats,
'subtitles': subtitles,
**traverse_obj(data, {
'title': ('title', {str}),
'description': ('subtitle', {str}),
'thumbnail': ('picture', {url_or_none}),
'timestamp': ('date_publish', {parse_iso8601}),
}),
} }