[makertv] improve extraction

This commit is contained in:
remitamine 2015-12-21 04:24:58 +01:00
parent 78653a33aa
commit 7cb0952474
3 changed files with 22 additions and 7 deletions

View file

@ -53,6 +53,7 @@
from .snagfilms import SnagFilmsEmbedIE from .snagfilms import SnagFilmsEmbedIE
from .screenwavemedia import ScreenwaveMediaIE from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE from .mtv import MTVServicesEmbeddedIE
from .jwplatform import JWPlatformIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -1787,6 +1788,11 @@ def _playlist_from_matches(matches, getter=None, ie=None):
if snagfilms_url: if snagfilms_url:
return self.url_result(snagfilms_url) return self.url_result(snagfilms_url)
# Look for JWPlatform embeds
jwplatform_url = JWPlatformIE._extract_url(webpage)
if jwplatform_url:
return self.url_result(jwplatform_url, 'JWPlatform')
# Look for ScreenwaveMedia embeds # Look for ScreenwaveMedia embeds
mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage) mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
if mobj is not None: if mobj is not None:

View file

@ -1,6 +1,8 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none from ..utils import int_or_none
@ -23,7 +25,7 @@ class JWPlatformIE(InfoExtractor):
@staticmethod @staticmethod
def _extract_url(webpage): def _extract_url(webpage):
mobj = re.search( mobj = re.search(
r'<script[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8}', r'<script[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})',
webpage) webpage)
if mobj: if mobj:
return mobj.group('url') return mobj.group('url')
@ -42,7 +44,9 @@ def _real_extract(self, url):
source_url = self._proto_relative_url(source['file']) source_url = self._proto_relative_url(source['file'])
source_type = source.get('type') or '' source_type = source.get('type') or ''
if source_type == 'application/vnd.apple.mpegurl': if source_type == 'application/vnd.apple.mpegurl':
formats.extend(self._extract_m3u8_formats(source_url, video_id, 'mp4', 'm3u8_native', fatal=None)) m3u8_formats = self._extract_m3u8_formats(source_url, video_id, 'mp4', 'm3u8_native', fatal=None)
if m3u8_formats:
formats.extend(m3u8_formats)
elif source_type.startswith('audio'): elif source_type.startswith('audio'):
formats.append({ formats.append({
'url': source_url, 'url': source_url,
@ -57,7 +61,7 @@ def _real_extract(self, url):
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': video_data['mediaid'], 'id': video_id,
'title': video_data['title'], 'title': video_data['title'],
'description': video_data.get('description'), 'description': video_data.get('description'),
'thumbnail': self._proto_relative_url(video_data.get('image')), 'thumbnail': self._proto_relative_url(video_data.get('image')),

View file

@ -5,12 +5,12 @@
class MakerTVIE(InfoExtractor): class MakerTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)?video|http://makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})' _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
_TEST = { _TEST = {
'url': 'http://www.maker.tv/video/Fh3QgymL9gsc', 'url': 'http://www.maker.tv/video/Fh3QgymL9gsc',
'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e', 'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
'info_dict': { 'info_dict': {
'id': 'brOEcGut', 'id': 'Fh3QgymL9gsc',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Maze Runner: The Scorch Trials Official Movie Review', 'title': 'Maze Runner: The Scorch Trials Official Movie Review',
'description': 'md5:11ff3362d7ef1d679fdb649f6413975a', 'description': 'md5:11ff3362d7ef1d679fdb649f6413975a',
@ -22,6 +22,11 @@ class MakerTVIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
jwplatform_id = self._search_regex([r'jwid="([^"]+)"', r'Maker.jw_id\s*=\s*"([^"]+)";'], webpage, 'jwplatform id') jwplatform_id = self._search_regex(r'jw_?id="([^"]+)"', webpage, 'jwplatform id')
return self.url_result('jwplatform:%s' % jwplatform_id, 'JWPlatform') return {
'_type': 'url_transparent',
'id': video_id,
'url': 'jwplatform:%s' % jwplatform_id,
'ie_key': 'JWPlatform',
}