From c02ec7d4300d3e2607f48fe73011fd8caa38f90c Mon Sep 17 00:00:00 2001 From: remitamine Date: Wed, 30 Mar 2016 23:18:31 +0100 Subject: [PATCH] [cnbc] Add new extractor(closes #8012) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/cnbc.py | 29 +++++++++++++++++++++++++++++ youtube_dl/extractor/theplatform.py | 8 ++++---- 3 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 youtube_dl/extractor/cnbc.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1e4b078a4..bd1f7d293 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -127,6 +127,7 @@ from .clubic import ClubicIE from .clyp import ClypIE from .cmt import CMTIE +from .cnbc import CNBCIE from .cnet import CNETIE from .cnn import ( CNNIE, diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py new file mode 100644 index 000000000..593e459aa --- /dev/null +++ b/youtube_dl/extractor/cnbc.py @@ -0,0 +1,29 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import smuggle_url + + +class CNBCIE(InfoExtractor): + _VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P[0-9]+)' + _TEST = { + 'url': 'http://video.cnbc.com/gallery/?video=3000503714', + 'md5': '', + 'info_dict': { + 'id': '3000503714', + 'ext': 'mp4', + 'title': 'Video title goes here', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'url': smuggle_url( + 'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id, + {'force_smil_url': True}), + 'id': video_id, + } diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 863914299..236c99972 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -82,7 +82,7 @@ def get_metadata(self, path, video_id): class ThePlatformIE(ThePlatformBaseIE): _VALID_URL = r'''(?x) (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P[^/]+)/ - (?:(?P(?:(?:[^/]+/)+select/)?media/)|(?P(?:[^/\?]+/(?:swf|config)|onsite)/select/))? + (?:(?:(?:[^/]+/)+select/)?(?Pmedia/(?:guid/\d+/)?)|(?P(?:[^/\?]+/(?:swf|config)|onsite)/select/))? |theplatform:)(?P[^/\?&]+)''' _TESTS = [{ @@ -170,10 +170,10 @@ def _real_extract(self, url): if not provider_id: provider_id = 'dJ5BDC' - path = provider_id + path = provider_id + '/' if mobj.group('media'): - path += '/media' - path += '/' + video_id + path += mobj.group('media') + path += video_id qs_dict = compat_parse_qs(compat_urllib_parse_urlparse(url).query) if 'guid' in qs_dict: