From 7d273a387aade7665cd25eee69d94ee615d9a4b9 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 16 Sep 2016 19:31:39 +0100 Subject: [PATCH] [mangomolo] add support for Mangomolo embeds --- youtube_dl/extractor/awaan.py | 63 ++++++++++++------------------ youtube_dl/extractor/extractors.py | 4 ++ youtube_dl/extractor/generic.py | 29 ++++++++++++++ youtube_dl/extractor/mangomolo.py | 54 +++++++++++++++++++++++++ 4 files changed, 111 insertions(+), 39 deletions(-) create mode 100644 youtube_dl/extractor/mangomolo.py diff --git a/youtube_dl/extractor/awaan.py b/youtube_dl/extractor/awaan.py index bdf23c6a9..66d7515bc 100644 --- a/youtube_dl/extractor/awaan.py +++ b/youtube_dl/extractor/awaan.py @@ -50,25 +50,6 @@ def _parse_video_data(self, video_data, video_id, is_live): 'is_live': is_live, } - def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol): - formats = [] - format_url_base = 'http' + self._html_search_regex( - [ - r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8', - r']+href="rtsp(://[^"]+)"' - ], webpage, 'format url') - formats.extend(self._extract_mpd_formats( - format_url_base + '/manifest.mpd', - video_id, mpd_id='dash', fatal=False)) - formats.extend(self._extract_m3u8_formats( - format_url_base + '/playlist.m3u8', video_id, 'mp4', - m3u8_entry_protocol, m3u8_id='hls', fatal=False)) - formats.extend(self._extract_f4m_formats( - format_url_base + '/manifest.f4m', - video_id, f4m_id='hds', fatal=False)) - self._sort_formats(formats) - return formats - class AWAANVideoIE(AWAANBaseIE): IE_NAME = 'awaan:video' @@ -99,16 +80,18 @@ def _real_extract(self, url): video_id, headers={'Origin': 'http://awaan.ae'}) info = self._parse_video_data(video_data, video_id, False) - webpage = self._download_webpage( - 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + - compat_urllib_parse_urlencode({ - 'id': video_data['id'], - 'user_id': video_data['user_id'], - 'signature': video_data['signature'], - 'countries': 'Q0M=', - 'filter': 'DENY', - }), video_id) - info['formats'] = self._extract_video_formats(webpage, video_id, 'm3u8_native') + embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({ + 'id': video_data['id'], + 'user_id': video_data['user_id'], + 'signature': video_data['signature'], + 'countries': 'Q0M=', + 'filter': 'DENY', + }) + info.update({ + '_type': 'url_transparent', + 'url': embed_url, + 'ie_key': 'MangomoloVideo', + }) return info @@ -138,16 +121,18 @@ def _real_extract(self, url): channel_id, headers={'Origin': 'http://awaan.ae'}) info = self._parse_video_data(channel_data, channel_id, True) - webpage = self._download_webpage( - 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + - compat_urllib_parse_urlencode({ - 'id': base64.b64encode(channel_data['user_id'].encode()).decode(), - 'channelid': base64.b64encode(channel_data['id'].encode()).decode(), - 'signature': channel_data['signature'], - 'countries': 'Q0M=', - 'filter': 'DENY', - }), channel_id) - info['formats'] = self._extract_video_formats(webpage, channel_id, 'm3u8') + embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({ + 'id': base64.b64encode(channel_data['user_id'].encode()).decode(), + 'channelid': base64.b64encode(channel_data['id'].encode()).decode(), + 'signature': channel_data['signature'], + 'countries': 'Q0M=', + 'filter': 'DENY', + }) + info.update({ + '_type': 'url_transparent', + 'url': embed_url, + 'ie_key': 'MangomoloLive', + }) return info diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index dd0579425..4baf4cd48 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -472,6 +472,10 @@ from .mailru import MailRuIE from .makerschannel import MakersChannelIE from .makertv import MakerTVIE +from .mangomolo import ( + MangomoloVideoIE, + MangomoloLiveIE, +) from .matchtv import MatchTVIE from .mdr import MDRIE from .meta import METAIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2e46ca179..e01305942 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2254,6 +2254,35 @@ def _playlist_from_matches(matches, getter=None, ie=None): return self.url_result( self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform') + # Look for Mangomolo embeds + mobj = re.search( + r'''(?x)]+src=(["\'])(?P(?:https?:)?//(?:www\.)?admin\.mangomolo.com/analytics/index\.php/customers/embed/ + (?: + video\?.*?\bid=(?P\d+)| + index\?.*?\bchannelid=(?P(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+) + ).+?)\1''', webpage) + if mobj is not None: + info = { + '_type': 'url_transparent', + 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))), + 'title': video_title, + 'description': video_description, + 'thumbnail': video_thumbnail, + 'uploader': video_uploader, + } + video_id = mobj.group('video_id') + if video_id: + info.update({ + 'ie_key': 'MangomoloVideo', + 'id': video_id, + }) + else: + info.update({ + 'ie_key': 'MangomoloLive', + 'id': mobj.group('channel_id'), + }) + return info + # Look for Instagram embeds instagram_embed_url = InstagramIE._extract_embed_url(webpage) if instagram_embed_url is not None: diff --git a/youtube_dl/extractor/mangomolo.py b/youtube_dl/extractor/mangomolo.py new file mode 100644 index 000000000..8cac8ace2 --- /dev/null +++ b/youtube_dl/extractor/mangomolo.py @@ -0,0 +1,54 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote +from ..utils import ( + int_or_none, +) + + +class MangomoloBaseIE(InfoExtractor): + def _get_real_id(self, page_id): + return page_id + + def _real_extract(self, url): + page_id = self._get_real_id(self._match_id(url)) + webpage = self._download_webpage(url, page_id) + hidden_inputs = self._hidden_inputs(webpage) + m3u8_entry_protocol = 'm3u8' if self._IS_LIVE else 'm3u8_native' + + format_url = self._html_search_regex( + [ + r'file\s*:\s*"(https?://[^"]+?/playlist.m3u8)', + r']+href="(rtsp://[^"]+)"' + ], webpage, 'format url') + formats = self._extract_wowza_formats( + format_url, page_id, m3u8_entry_protocol, ['smil']) + self._sort_formats(formats) + + return { + 'id': page_id, + 'title': self._live_title(page_id) if self._IS_LIVE else page_id, + 'uploader_id': hidden_inputs.get('userid'), + 'duration': int_or_none(hidden_inputs.get('duration')), + 'is_live': self._IS_LIVE, + 'formats': formats, + } + + +class MangomoloVideoIE(MangomoloBaseIE): + IENAME = 'mangomolo:video' + _VALID_URL = r'https?://admin\.mangomolo.com/analytics/index\.php/customers/embed/video\?.*?\bid=(?P\d+)' + _IS_LIVE = False + + +class MangomoloLiveIE(MangomoloBaseIE): + IENAME = 'mangomolo:live' + _VALID_URL = r'https?://admin\.mangomolo.com/analytics/index\.php/customers/embed/index\?.*?\bchannelid=(?P(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)' + _IS_LIVE = True + + def _get_real_id(self, page_id): + return base64.b64decode(compat_urllib_parse_unquote(page_id).encode()).decode()