[nfl] Add new extractor. (Closes #3815)

2024-11-27 18:41:29 +00:00 · 2014-09-23 00:28:19 +03:00 · 2014-09-23 00:28:19 +03:00 · 632e5684ce
parent 094d42fe44
commit 632e5684ce
2 changed files with 104 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -240,6 +240,7 @@
 from .newgrounds import NewgroundsIE
 from .newstube import NewstubeIE
 from .nfb import NFBIE
 from .nfl import NFLIE
 from .nhl import NHLIE, NHLVideocenterIE
 from .niconico import NiconicoIE
 from .ninegag import NineGagIE
--- a/youtube_dl/extractor/nfl.py
+++ b/youtube_dl/extractor/nfl.py
@ -0,0 +1,103 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    int_or_none,
    remove_end,
 )
 class NFLIE(InfoExtractor):
    IE_NAME = 'nfl.com'
    _VALID_URL = r'(?x)https?://(?:www\.)?nfl\.com/(?:videos/(?:.+)/|.*?\#video=)(?P<id>\d..[0-9]+)'
    _PLAYER_CONFIG_URL = 'http://www.nfl.com/static/content/static/config/video/config.json'
    _TEST = {
        'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
        'skip_download': True,  # md5 sum fluctuates
        'info_dict': {
            'id': '0ap3000000398478',
            'ext': 'mp4',
            'title': 'Week 3: Washington Redskins vs. Philadelphia Eagles highlights',
            'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
            'upload_date': '20140921',
            'timestamp': 1411337580,
            'thumbnail': 're:^https?://.*\.jpg$',
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        config = self._download_json(self._PLAYER_CONFIG_URL, video_id,
                                     note='Downloading player config')
        url_template = 'http://nfl.com{contentURLTemplate:s}'.format(**config)
        video_data = self._download_json(url_template.format(id=video_id), video_id)
        cdns = config.get('cdns')
        if not cdns:
            raise ExtractorError('Failed to get CDN data', expected=True)
        formats = []
        streams = video_data.get('cdnData', {}).get('bitrateInfo', [])
        for name, cdn in cdns.items():
            # LimeLight streams don't seem to work
            if cdn.get('name') == 'LIMELIGHT':
                continue
            protocol = cdn.get('protocol')
            host = remove_end(cdn.get('host', ''), '/')
            if not (protocol and host):
                continue
            path_prefix = cdn.get('pathprefix', '')
            if path_prefix and not path_prefix.endswith('/'):
                path_prefix = '%s/' % path_prefix
            get_url = lambda p: '{protocol:s}://{host:s}/{prefix:s}{path:}'.format(
                protocol=protocol,
                host=host,
                prefix=path_prefix,
                path=p,
            )
            if protocol == 'rtmp':
                preference = -2
            elif 'prog' in name.lower():
                preference = -1
            else:
                preference = 0
            for stream in streams:
                path = stream.get('path')
                if not path:
                    continue
                formats.append({
                    'url': get_url(path),
                    'vbr': int_or_none(stream.get('rate', 0), 1000),
                    'preference': preference,
                    'format_note': name,
                })
        self._sort_formats(formats)
        thumbnail = None
        for q in ('xl', 'l', 'm', 's', 'xs'):
            thumbnail = video_data.get('imagePaths', {}).get(q)
            if thumbnail:
                break
        return {
            'id': video_id,
            'title': video_data.get('storyHeadline'),
            'formats': formats,
            'description': video_data.get('caption'),
            'duration': video_data.get('duration'),
            'thumbnail': thumbnail,
            'timestamp': int_or_none(video_data.get('posted'), 1000),
        }