From 632e5684ce797eb8a7372eb25dd4ce299f2e66de Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Tue, 23 Sep 2014 00:28:19 +0300 Subject: [PATCH] [nfl] Add new extractor. (Closes #3815) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/nfl.py | 103 +++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 youtube_dl/extractor/nfl.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 244d22297..1f1fc0eb2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -240,6 +240,7 @@ from .newgrounds import NewgroundsIE from .newstube import NewstubeIE from .nfb import NFBIE +from .nfl import NFLIE from .nhl import NHLIE, NHLVideocenterIE from .niconico import NiconicoIE from .ninegag import NineGagIE diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py new file mode 100644 index 000000000..f53596f5e --- /dev/null +++ b/youtube_dl/extractor/nfl.py @@ -0,0 +1,103 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + remove_end, +) + + +class NFLIE(InfoExtractor): + IE_NAME = 'nfl.com' + _VALID_URL = r'(?x)https?://(?:www\.)?nfl\.com/(?:videos/(?:.+)/|.*?\#video=)(?P\d..[0-9]+)' + _PLAYER_CONFIG_URL = 'http://www.nfl.com/static/content/static/config/video/config.json' + _TEST = { + 'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights', + 'skip_download': True, # md5 sum fluctuates + 'info_dict': { + 'id': '0ap3000000398478', + 'ext': 'mp4', + 'title': 'Week 3: Washington Redskins vs. Philadelphia Eagles highlights', + 'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478', + 'upload_date': '20140921', + 'timestamp': 1411337580, + 'thumbnail': 're:^https?://.*\.jpg$', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + config = self._download_json(self._PLAYER_CONFIG_URL, video_id, + note='Downloading player config') + url_template = 'http://nfl.com{contentURLTemplate:s}'.format(**config) + video_data = self._download_json(url_template.format(id=video_id), video_id) + + cdns = config.get('cdns') + if not cdns: + raise ExtractorError('Failed to get CDN data', expected=True) + + formats = [] + streams = video_data.get('cdnData', {}).get('bitrateInfo', []) + for name, cdn in cdns.items(): + # LimeLight streams don't seem to work + if cdn.get('name') == 'LIMELIGHT': + continue + + protocol = cdn.get('protocol') + host = remove_end(cdn.get('host', ''), '/') + if not (protocol and host): + continue + + path_prefix = cdn.get('pathprefix', '') + if path_prefix and not path_prefix.endswith('/'): + path_prefix = '%s/' % path_prefix + + get_url = lambda p: '{protocol:s}://{host:s}/{prefix:s}{path:}'.format( + protocol=protocol, + host=host, + prefix=path_prefix, + path=p, + ) + + if protocol == 'rtmp': + preference = -2 + elif 'prog' in name.lower(): + preference = -1 + else: + preference = 0 + + for stream in streams: + path = stream.get('path') + if not path: + continue + + formats.append({ + 'url': get_url(path), + 'vbr': int_or_none(stream.get('rate', 0), 1000), + 'preference': preference, + 'format_note': name, + }) + + self._sort_formats(formats) + + thumbnail = None + for q in ('xl', 'l', 'm', 's', 'xs'): + thumbnail = video_data.get('imagePaths', {}).get(q) + if thumbnail: + break + + return { + 'id': video_id, + 'title': video_data.get('storyHeadline'), + 'formats': formats, + 'description': video_data.get('caption'), + 'duration': video_data.get('duration'), + 'thumbnail': thumbnail, + 'timestamp': int_or_none(video_data.get('posted'), 1000), + }