[ign] add support for pcmag and extract all formats and more metadata

This commit is contained in:
remitamine 2015-10-02 21:58:20 +01:00
parent 59a9efe85b
commit adccf33632
2 changed files with 93 additions and 30 deletions

View file

@ -231,7 +231,11 @@
from .huffpost import HuffPostIE from .huffpost import HuffPostIE
from .hypem import HypemIE from .hypem import HypemIE
from .iconosquare import IconosquareIE from .iconosquare import IconosquareIE
from .ign import IGNIE, OneUPIE from .ign import (
IGNIE,
OneUPIE,
PCMagIE,
)
from .imdb import ( from .imdb import (
ImdbIE, ImdbIE,
ImdbListIE ImdbListIE

View file

@ -3,6 +3,10 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
)
class IGNIE(InfoExtractor): class IGNIE(InfoExtractor):
@ -11,25 +15,23 @@ class IGNIE(InfoExtractor):
Some videos of it.ign.com are also supported Some videos of it.ign.com are also supported
""" """
_VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles|(?:[^/]*/feature))(/.+)?/(?P<name_or_id>.+)' _VALID_URL = r'https?://.+?\.ign\.com/(?:[^/]+/)?(?P<type>videos|show_videos|articles|feature|(?:[^/]+/\d+/video))(/.+)?/(?P<name_or_id>.+)'
IE_NAME = 'ign.com' IE_NAME = 'ign.com'
_CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config' _API_URL_TEMPLATE = 'http://apis.ign.com/video/v3/videos/%s'
_DESCRIPTION_RE = [ _EMBED_RE = r'<iframe[^>]+?["\']((?:https?:)?//.+?\.ign\.com.+?/embed.+?)["\']'
r'<span class="page-object-description">(.+?)</span>',
r'id="my_show_video">.*?<p>(.*?)</p>',
r'<meta name="description" content="(.*?)"',
]
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review', 'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
'md5': 'eac8bdc1890980122c3b66f14bdd02e9', 'md5': 'febda82c4bafecd2d44b6e1a18a595f8',
'info_dict': { 'info_dict': {
'id': '8f862beef863986b2785559b9e1aa599', 'id': '8f862beef863986b2785559b9e1aa599',
'ext': 'mp4', 'ext': 'mp4',
'title': 'The Last of Us Review', 'title': 'The Last of Us Review',
'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c', 'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
'timestamp': 1370440800,
'upload_date': '20130605',
} }
}, },
{ {
@ -44,6 +46,8 @@ class IGNIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'GTA 5 Video Review', 'title': 'GTA 5 Video Review',
'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.', 'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
'timestamp': 1379339880,
'upload_date': '20130916',
}, },
}, },
{ {
@ -52,6 +56,8 @@ class IGNIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': '26 Twisted Moments from GTA 5 in Slow Motion', 'title': '26 Twisted Moments from GTA 5 in Slow Motion',
'description': 'The twisted beauty of GTA 5 in stunning slow motion.', 'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
'timestamp': 1386878820,
'upload_date': '20131212',
}, },
}, },
], ],
@ -66,10 +72,9 @@ class IGNIE(InfoExtractor):
'id': '078fdd005f6d3c02f63d795faa1b984f', 'id': '078fdd005f6d3c02f63d795faa1b984f',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Rewind Theater - Wild Trailer Gamescom 2014', 'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
'description': ( 'description': 'Brian and Jared explore Michel Ancel\'s captivating new preview.',
'Giant skeletons, bloody hunts, and captivating' 'timestamp': 1408047180,
' natural beauty take our breath away.' 'upload_date': '20140814',
),
}, },
}, },
] ]
@ -82,7 +87,7 @@ def _find_video_id(self, webpage):
r'<object id="vid_(.+?)"', r'<object id="vid_(.+?)"',
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"', r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
] ]
return self._search_regex(res_id, webpage, 'video id') return self._search_regex(res_id, webpage, 'video id', default=None)
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -102,22 +107,45 @@ def _real_extract(self, url):
} }
video_id = self._find_video_id(webpage) video_id = self._find_video_id(webpage)
result = self._get_video_info(video_id) if not video_id:
description = self._html_search_regex(self._DESCRIPTION_RE, return self.url_result(self._search_regex(self._EMBED_RE, webpage, 'embed url'))
webpage, 'video description', flags=re.DOTALL) return self._get_video_info(video_id)
result['description'] = description
return result
def _get_video_info(self, video_id): def _get_video_info(self, video_id):
config_url = self._CONFIG_URL_TEMPLATE % video_id api_data = self._download_json(self._API_URL_TEMPLATE % video_id, video_id)
config = self._download_json(config_url, video_id)
media = config['playlist']['media'] formats = []
m3u8_url = api_data['refs'].get('m3uUrl')
if m3u8_url:
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id))
f4m_url = api_data['refs'].get('f4mUrl')
if f4m_url:
formats.extend(self._extract_f4m_formats(f4m_url, video_id))
for asset in api_data['assets']:
formats.append({
'url': asset['url'],
'tbr': asset.get('actual_bitrate_kbps'),
'fps': asset.get('frame_rate'),
'height': int_or_none(asset.get('height')),
'width': int_or_none(asset.get('width')),
})
self._sort_formats(formats)
thumbnails = []
for thumbnail in api_data['thumbnails']:
thumbnails.append({'url': thumbnail['url']})
metadata = api_data['metadata']
return { return {
'id': media['metadata']['videoId'], 'id': api_data.get('videoId') or video_id,
'url': media['url'], 'title': metadata.get('longTitle') or metadata.get('name') or metadata.get['title'],
'title': media['metadata']['title'], 'description': metadata.get('description'),
'thumbnail': media['poster'][0]['url'].replace('{size}', 'grande'), 'timestamp': parse_iso8601(metadata.get('publishDate')),
'duration': int_or_none(metadata.get('duration')),
'display_id': metadata.get('slug') or video_id,
'thumbnails': thumbnails,
'formats': formats,
} }
@ -125,16 +153,16 @@ class OneUPIE(IGNIE):
_VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)\.html' _VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)\.html'
IE_NAME = '1up.com' IE_NAME = '1up.com'
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
_TESTS = [{ _TESTS = [{
'url': 'http://gamevideos.1up.com/video/id/34976.html', 'url': 'http://gamevideos.1up.com/video/id/34976.html',
'md5': '68a54ce4ebc772e4b71e3123d413163d', 'md5': 'c9cc69e07acb675c31a16719f909e347',
'info_dict': { 'info_dict': {
'id': '34976', 'id': '34976',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Sniper Elite V2 - Trailer', 'title': 'Sniper Elite V2 - Trailer',
'description': 'md5:5d289b722f5a6d940ca3136e9dae89cf', 'description': 'md5:bf0516c5ee32a3217aa703e9b1bc7826',
'timestamp': 1313099220,
'upload_date': '20110811',
} }
}] }]
@ -143,3 +171,34 @@ def _real_extract(self, url):
result = super(OneUPIE, self)._real_extract(url) result = super(OneUPIE, self)._real_extract(url)
result['id'] = mobj.group('name_or_id') result['id'] = mobj.group('name_or_id')
return result return result
class PCMagIE(IGNIE):
_VALID_URL = r'https?://(?:www\.)?pcmag\.com/(?P<type>videos|article2)(/.+)?/(?P<name_or_id>.+)'
IE_NAME = 'pcmag'
_EMBED_RE = r'iframe.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content.html?[^"]*url=([^"]+)["&]'
_TESTS = [{
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
'md5': '212d6154fd0361a2781075f1febbe9ad',
'info_dict': {
'id': 'ee10d774b508c9b8ec07e763b9125b91',
'ext': 'mp4',
'title': '010615_What\'s New Now: Is GoGo Snooping on Your Data?',
'description': 'md5:a7071ae64d2f68cc821c729d4ded6bb3',
'timestamp': 1420571160,
'upload_date': '20150106',
}
},{
'url': 'http://www.pcmag.com/article2/0,2817,2470156,00.asp',
'md5': '94130c1ca07ba0adb6088350681f16c1',
'info_dict': {
'id': '042e560ba94823d43afcb12ddf7142ca',
'ext': 'mp4',
'title': 'HTC\'s Weird New Re Camera - What\'s New Now',
'description': 'md5:53433c45df96d2ea5d0fda18be2ca908',
'timestamp': 1412953920,
'upload_date': '20141010',
}
}]