From 02e4ebbbad5653b9bbbcf615bdcae6b2c7ea1e30 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 22 Nov 2013 17:19:22 +0100 Subject: [PATCH] [streamcloud] Add IE (Fixes #1801) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/streamcloud.py | 65 +++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 youtube_dl/extractor/streamcloud.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 802beef214..02f9e25468 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -128,6 +128,7 @@ from .stanfordoc import StanfordOpenClassroomIE from .statigram import StatigramIE from .steam import SteamIE +from .streamcloud import StreamcloudIE from .sztvhu import SztvHuIE from .teamcoco import TeamcocoIE from .techtalks import TechTalksIE diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py new file mode 100644 index 0000000000..d476693ec0 --- /dev/null +++ b/youtube_dl/extractor/streamcloud.py @@ -0,0 +1,65 @@ +# coding: utf-8 +import re +import time + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + compat_urllib_request, +) + + +class StreamcloudIE(InfoExtractor): + IE_NAME = u'streamcloud.eu' + _VALID_URL = r'https?://streamcloud\.eu/(?P[a-zA-Z0-9_-]+)/(?P[^#?]*)\.html' + + _TEST = { + u'url': u'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html', + u'file': u'skp9j99s4bpz.mp4', + u'md5': u'6bea4c7fa5daaacc2a946b7146286686', + u'info_dict': { + u'title': u'youtube-dl test video \'/\\ ä ↭', + u'duration': 9, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + orig_webpage = self._download_webpage(url, video_id) + + fields = re.findall(r'''(?x)]*>([^<]+)<', webpage, u'title') + video_url = self._search_regex( + r'file:\s*"([^"]+)"', webpage, u'video URL') + duration_str = self._search_regex( + r'duration:\s*"?([0-9]+)"?', webpage, u'duration', fatal=False) + duration = None if duration_str is None else int(duration_str) + thumbnail = self._search_regex( + r'image:\s*"([^"]+)"', webpage, u'thumbnail URL', fatal=False) + + return { + 'id': video_id, + 'title': title, + 'url': video_url, + 'duration': duration, + 'thumbnail': thumbnail, + }