[extractor/moview] Add extractor (#4607)

Authored by: HobbyistDev
This commit is contained in:
HobbyistDev 2022-08-15 05:09:05 +09:00 committed by GitHub
parent cb7cc448c0
commit 7695f5a0a7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 99 additions and 44 deletions

View file

@ -975,6 +975,7 @@
from .motorsport import MotorsportIE from .motorsport import MotorsportIE
from .movieclips import MovieClipsIE from .movieclips import MovieClipsIE
from .moviepilot import MoviepilotIE from .moviepilot import MoviepilotIE
from .moview import MoviewPlayIE
from .moviezine import MoviezineIE from .moviezine import MoviezineIE
from .movingimage import MovingImageIE from .movingimage import MovingImageIE
from .msn import MSNIE from .msn import MSNIE

51
yt_dlp/extractor/jixie.py Normal file
View file

@ -0,0 +1,51 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
float_or_none,
traverse_obj,
try_call,
)
# more info about jixie:
# [1] https://jixie.atlassian.net/servicedesk/customer/portal/2/article/1339654214?src=-1456335525,
# [2] https://scripts.jixie.media/jxvideo.3.1.min.js
class JixieBaseIE(InfoExtractor):
def _extract_data_from_jixie_id(self, display_id, video_id, webpage):
json_data = self._download_json(
'https://apidam.jixie.io/api/public/stream', display_id,
query={'metadata': 'full', 'video_id': video_id})['data']
formats, subtitles = [], {}
for stream in json_data['streams']:
if stream.get('type') == 'HLS':
fmt, sub = self._extract_m3u8_formats_and_subtitles(stream.get('url'), display_id, ext='mp4')
if json_data.get('drm'):
for f in fmt:
f['has_drm'] = True
formats.extend(fmt)
self._merge_subtitles(sub, target=subtitles)
else:
formats.append({
'url': stream.get('url'),
'width': stream.get('width'),
'height': stream.get('height'),
'ext': 'mp4',
})
self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
'formats': formats,
'subtitles': subtitles,
'title': json_data.get('title') or self._html_search_meta(['og:title', 'twitter:title'], webpage),
'description': (clean_html(traverse_obj(json_data, ('metadata', 'description')))
or self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage)),
'thumbnails': traverse_obj(json_data, ('metadata', 'thumbnails')),
'duration': float_or_none(traverse_obj(json_data, ('metadata', 'duration'))),
'tags': try_call(lambda: (json_data['metadata']['keywords'] or None).split(',')),
'categories': try_call(lambda: (json_data['metadata']['categories'] or None).split(',')),
'uploader_id': json_data.get('owner_id'),
}

View file

@ -1,17 +1,9 @@
from .common import InfoExtractor from .jixie import JixieBaseIE
from ..utils import (
clean_html,
float_or_none,
traverse_obj,
try_call,
)
# Video from www.kompas.tv and video.kompas.com seems use jixie player # Video from video.kompas.com seems use jixie player
# see [1] https://jixie.atlassian.net/servicedesk/customer/portal/2/article/1339654214?src=-1456335525,
# [2] https://scripts.jixie.media/jxvideo.3.1.min.js for more info
class KompasVideoIE(InfoExtractor): class KompasVideoIE(JixieBaseIE):
_VALID_URL = r'https?://video\.kompas\.com/\w+/(?P<id>\d+)/(?P<slug>[\w-]+)' _VALID_URL = r'https?://video\.kompas\.com/\w+/(?P<id>\d+)/(?P<slug>[\w-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://video.kompas.com/watch/164474/kim-jong-un-siap-kirim-nuklir-lawan-as-dan-korsel', 'url': 'https://video.kompas.com/watch/164474/kim-jong-un-siap-kirim-nuklir-lawan-as-dan-korsel',
@ -33,36 +25,4 @@ def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'slug') video_id, display_id = self._match_valid_url(url).group('id', 'slug')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
json_data = self._download_json( return self._extract_data_from_jixie_id(display_id, video_id, webpage)
'https://apidam.jixie.io/api/public/stream', display_id,
query={'metadata': 'full', 'video_id': video_id})['data']
formats, subtitles = [], {}
for stream in json_data['streams']:
if stream.get('type') == 'HLS':
fmt, sub = self._extract_m3u8_formats_and_subtitles(stream.get('url'), display_id, ext='mp4')
formats.extend(fmt)
self._merge_subtitles(sub, target=subtitles)
else:
formats.append({
'url': stream.get('url'),
'width': stream.get('width'),
'height': stream.get('height'),
'ext': 'mp4',
})
self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
'formats': formats,
'subtitles': subtitles,
'title': json_data.get('title') or self._html_search_meta(['og:title', 'twitter:title'], webpage),
'description': (clean_html(traverse_obj(json_data, ('metadata', 'description')))
or self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage)),
'thumbnails': traverse_obj(json_data, ('metadata', 'thumbnails')),
'duration': float_or_none(traverse_obj(json_data, ('metadata', 'duration'))),
'tags': try_call(lambda: json_data['metadata']['keywords'].split(',')),
'categories': try_call(lambda: json_data['metadata']['categories'].split(',')),
'uploader_id': json_data.get('owner_id'),
}

View file

@ -0,0 +1,43 @@
from .jixie import JixieBaseIE
class MoviewPlayIE(JixieBaseIE):
_VALID_URL = r'https?://www\.moview\.id/play/\d+/(?P<id>[\w-]+)'
_TESTS = [
{
# drm hls, only use direct link
'url': 'https://www.moview.id/play/174/Candy-Monster',
'info_dict': {
'id': '146182',
'ext': 'mp4',
'display_id': 'Candy-Monster',
'uploader_id': 'Mo165qXUUf',
'duration': 528.2,
'title': 'Candy Monster',
'description': 'Mengapa Candy Monster ingin mengambil permen Chloe?',
'thumbnail': 'https://video.jixie.media/1034/146182/146182_1280x720.jpg',
}
}, {
# non-drm hls
'url': 'https://www.moview.id/play/75/Paris-Van-Java-Episode-16',
'info_dict': {
'id': '28210',
'ext': 'mp4',
'duration': 2595.666667,
'display_id': 'Paris-Van-Java-Episode-16',
'uploader_id': 'Mo165qXUUf',
'thumbnail': 'https://video.jixie.media/1003/28210/28210_1280x720.jpg',
'description': 'md5:2a5e18d98eef9b39d7895029cac96c63',
'title': 'Paris Van Java Episode 16',
}
}
]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'video_id\s*=\s*"(?P<video_id>[^"]+)', webpage, 'video_id')
return self._extract_data_from_jixie_id(display_id, video_id, webpage)