[noovo] Fix extraction (closes #14214)

This commit is contained in:
Sergey M․ 2017-09-15 23:12:19 +07:00
parent 6be44a50ed
commit a4245acef8
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -6,6 +6,7 @@
from ..compat import compat_str from ..compat import compat_str
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
js_to_json,
smuggle_url, smuggle_url,
try_get, try_get,
) )
@ -24,8 +25,6 @@ class NoovoIE(InfoExtractor):
'timestamp': 1491399228, 'timestamp': 1491399228,
'upload_date': '20170405', 'upload_date': '20170405',
'uploader_id': '618566855001', 'uploader_id': '618566855001',
'creator': 'vtele',
'view_count': int,
'series': 'RPM+', 'series': 'RPM+',
}, },
'params': { 'params': {
@ -37,13 +36,11 @@ class NoovoIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '5395865725001', 'id': '5395865725001',
'title': 'Épisode 13 : Les retrouvailles', 'title': 'Épisode 13 : Les retrouvailles',
'description': 'md5:336d5ebc5436534e61d16e63ddfca327', 'description': 'md5:888c3330f0c1b4476c5bc99a1c040473',
'ext': 'mp4', 'ext': 'mp4',
'timestamp': 1492019320, 'timestamp': 1492019320,
'upload_date': '20170412', 'upload_date': '20170412',
'uploader_id': '618566855001', 'uploader_id': '618566855001',
'creator': 'vtele',
'view_count': int,
'series': "L'amour est dans le pré", 'series': "L'amour est dans le pré",
'season_number': 5, 'season_number': 5,
'episode': 'Épisode 13', 'episode': 'Épisode 13',
@ -58,40 +55,46 @@ class NoovoIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
data = self._download_json( webpage = self._download_webpage(url, video_id)
'http://api.noovo.ca/api/v1/pages/single-episode/%s' % video_id,
video_id)['data']
content = try_get(data, lambda x: x['contents'][0]) bc_url = BrightcoveNewIE._extract_url(self, webpage)
brightcove_id = data.get('brightcoveId') or content['brightcoveId'] data = self._parse_json(
self._search_regex(
r'(?s)dataLayer\.push\(\s*({.+?})\s*\);', webpage, 'data',
default='{}'),
video_id, transform_source=js_to_json, fatal=False)
title = try_get(
data, lambda x: x['video']['nom'],
compat_str) or self._html_search_meta(
'dcterms.Title', webpage, 'title', fatal=True)
description = self._html_search_meta(
('dcterms.Description', 'description'), webpage, 'description')
series = try_get( series = try_get(
data, ( data, lambda x: x['emission']['nom']) or self._search_regex(
lambda x: x['show']['title'], r'<div[^>]+class="banner-card__subtitle h4"[^>]*>([^<]+)',
lambda x: x['season']['show']['title']), webpage, 'series', default=None)
compat_str)
episode = None season_el = try_get(data, lambda x: x['emission']['saison'], dict) or {}
og = data.get('og') season = try_get(season_el, lambda x: x['nom'], compat_str)
if isinstance(og, dict) and og.get('type') == 'video.episode': season_number = int_or_none(try_get(season_el, lambda x: x['numero']))
episode = og.get('title')
video = content or data episode_el = try_get(season_el, lambda x: x['episode'], dict) or {}
episode = try_get(episode_el, lambda x: x['nom'], compat_str)
episode_number = int_or_none(try_get(episode_el, lambda x: x['numero']))
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'ie_key': BrightcoveNewIE.ie_key(), 'ie_key': BrightcoveNewIE.ie_key(),
'url': smuggle_url( 'url': smuggle_url(bc_url, {'geo_countries': ['CA']}),
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'title': title,
{'geo_countries': ['CA']}), 'description': description,
'id': brightcove_id,
'title': video.get('title'),
'creator': video.get('source'),
'view_count': int_or_none(video.get('viewsCount')),
'series': series, 'series': series,
'season_number': int_or_none(try_get( 'season': season,
data, lambda x: x['season']['seasonNumber'])), 'season_number': season_number,
'episode': episode, 'episode': episode,
'episode_number': int_or_none(data.get('episodeNumber')), 'episode_number': episode_number,
} }