From bc0f937b55aae6ce731d259a7658b0281c2e62ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 May 2015 03:01:52 +0600 Subject: [PATCH] [tv2] Add extractor (#5724) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/tv2.py | 93 ++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 youtube_dl/extractor/tv2.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 6b19eb6f82..fb4f63ca3a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -572,6 +572,7 @@ from .tunein import TuneInIE from .turbo import TurboIE from .tutv import TutvIE +from .tv2 import TV2IE from .tv4 import TV4IE from .tvigle import TvigleIE from .tvp import TvpIE, TvpSeriesIE diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py new file mode 100644 index 0000000000..2dcc0e971e --- /dev/null +++ b/youtube_dl/extractor/tv2.py @@ -0,0 +1,93 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + int_or_none, + float_or_none, + parse_iso8601, +) + + +class TV2IE(InfoExtractor): + _VALID_URL = 'http://(?:www\.)?tv2\.no/v/(?P\d+)' + _TEST = { + 'url': 'http://www.tv2.no/v/916509/', + 'md5': '9cb9e3410b18b515d71892f27856e9b1', + 'info_dict': { + 'id': '916509', + 'ext': 'flv', + 'title': 'Se Gryttens hyllest av Steven Gerrard', + 'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.', + 'timestamp': 1431715610, + 'upload_date': '20150515', + 'duration': 156.967, + 'view_count': int, + 'categories': list, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + formats = [] + format_urls = [] + for protocol in ('HDS', 'HLS'): + data = self._download_json( + 'http://sumo.tv2.no/api/web/asset/%s/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % (video_id, protocol), + video_id, 'Downloading play JSON')['playback'] + for item in data['items']['item']: + video_url = item.get('url') + if not video_url or video_url in format_urls: + continue + format_id = '%s-%s' % (protocol.lower(), item.get('mediaFormat')) + if not self._is_valid_url(video_url, video_id, format_id): + continue + format_urls.append(video_url) + ext = determine_ext(video_url) + if ext == 'f4m': + formats.extend(self._extract_f4m_formats( + video_url, video_id, f4m_id=format_id)) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', m3u8_id=format_id)) + elif ext == 'ism' or video_url.endswith('.ism/Manifest'): + pass + else: + formats.append({ + 'url': video_url, + 'format_id': format_id, + 'tbr': int_or_none(item.get('bitrate')), + 'filesize': int_or_none(item.get('fileSize')), + }) + self._sort_formats(formats) + + asset = self._download_json( + 'http://sumo.tv2.no/api/web/asset/%s.json' % video_id, + video_id, 'Downloading metadata JSON')['asset'] + + title = asset['title'] + description = asset.get('description') + timestamp = parse_iso8601(asset.get('createTime')) + duration = float_or_none(asset.get('accurateDuration') or asset.get('duration')) + view_count = int_or_none(asset.get('views')) + categories = asset.get('keywords', '').split(',') + + thumbnails = [{ + 'id': thumbnail.get('@type'), + 'url': thumbnail.get('url'), + } for _, thumbnail in asset.get('imageVersions', {}).items()] + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'description': description, + 'thumbnails': thumbnails, + 'timestamp': timestamp, + 'duration': duration, + 'view_count': view_count, + 'categories': categories, + 'formats': formats, + }