Merge pull request #7322 from remitamine/vgtv

[vgtv] extract videos from FTV, Aftenposten, Aftonbladet using VGTVIE
This commit is contained in:
remitamine 2015-12-22 16:10:04 +01:00
commit 31d9ea4a3e
4 changed files with 122 additions and 82 deletions

View file

@ -15,7 +15,6 @@
AdobeTVVideoIE, AdobeTVVideoIE,
) )
from .adultswim import AdultSwimIE from .adultswim import AdultSwimIE
from .aftenposten import AftenpostenIE
from .aftonbladet import AftonbladetIE from .aftonbladet import AftonbladetIE
from .airmozilla import AirMozillaIE from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE from .aljazeera import AlJazeeraIE

View file

@ -1,23 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class AftenpostenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/(?:#!/)?video/(?P<id>\d+)'
_TEST = {
'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
'md5': 'fd828cd29774a729bf4d4425fe192972',
'info_dict': {
'id': '21039',
'ext': 'mov',
'title': 'TRAILER: "Sweatshop" - I can´t take any more',
'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
'timestamp': 1416927969,
'upload_date': '20141125',
}
}
def _real_extract(self, url):
return self.url_result('xstream:ap:%s' % self._match_id(url), 'Xstream')

View file

@ -4,26 +4,48 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .xstream import XstreamIE
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
) )
class VGTVIE(InfoExtractor): class VGTVIE(XstreamIE):
IE_DESC = 'VGTV and BTTV' IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet'
_HOST_TO_APPNAME = {
'vgtv.no': 'vgtv',
'bt.no/tv': 'bttv',
'aftenbladet.no/tv': 'satv',
'fvn.no/fvntv': 'fvntv',
'aftenposten.no/webtv': 'aptv',
}
_APP_NAME_TO_VENDOR = {
'vgtv': 'vgtv',
'bttv': 'bt',
'satv': 'sa',
'fvntv': 'fvn',
'aptv': 'ap',
}
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?: (?:https?://(?:www\.)?
vgtv:| (?P<host>
http://(?:www\.)? %s
) )
(?P<host>vgtv|bt) /
(?: (?:
:| \#!/(?:video|live)/|
\.no/(?:tv/)?\#!/(?:video|live)/ embed?.*id=
) )|
(?P<id>[0-9]+) (?P<appname>
''' %s
):)
(?P<id>\d+)
''' % ('|'.join(_HOST_TO_APPNAME.keys()), '|'.join(_APP_NAME_TO_VENDOR.keys()))
_TESTS = [ _TESTS = [
{ {
# streamType: vod # streamType: vod
@ -59,17 +81,18 @@ class VGTVIE(InfoExtractor):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Video is no longer available',
}, },
{ {
# streamType: live # streamType: wasLive
'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla', 'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla',
'info_dict': { 'info_dict': {
'id': '113063', 'id': '113063',
'ext': 'flv', 'ext': 'mp4',
'title': 're:^DIREKTE: V75 fra Solvalla [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'title': 'V75 fra Solvalla 30.05.15',
'description': 'md5:b3743425765355855f88e096acc93231', 'description': 'md5:b3743425765355855f88e096acc93231',
'thumbnail': 're:^https?://.*\.jpg', 'thumbnail': 're:^https?://.*\.jpg',
'duration': 0, 'duration': 25966,
'timestamp': 1432975582, 'timestamp': 1432975582,
'upload_date': '20150530', 'upload_date': '20150530',
'view_count': int, 'view_count': int,
@ -79,6 +102,20 @@ class VGTVIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, },
{
'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
'md5': 'fd828cd29774a729bf4d4425fe192972',
'info_dict': {
'id': '21039',
'ext': 'mov',
'title': 'TRAILER: «SWEATSHOP» - I can´t take any more',
'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
'duration': 66,
'timestamp': 1417002452,
'upload_date': '20141126',
'view_count': int,
}
},
{ {
'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien', 'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien',
'only_matching': True, 'only_matching': True,
@ -89,21 +126,27 @@ def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
host = mobj.group('host') host = mobj.group('host')
appname = self._HOST_TO_APPNAME[host] if host else mobj.group('appname')
HOST_WEBSITES = { vendor = self._APP_NAME_TO_VENDOR[appname]
'vgtv': 'vgtv',
'bt': 'bttv',
}
data = self._download_json( data = self._download_json(
'http://svp.vg.no/svp/api/v1/%s/assets/%s?appName=%s-website' 'http://svp.vg.no/svp/api/v1/%s/assets/%s?appName=%s-website'
% (host, video_id, HOST_WEBSITES[host]), % (vendor, video_id, appname),
video_id, 'Downloading media JSON') video_id, 'Downloading media JSON')
if data.get('status') == 'inactive': if data.get('status') == 'inactive':
raise ExtractorError( raise ExtractorError(
'Video %s is no longer available' % video_id, expected=True) 'Video %s is no longer available' % video_id, expected=True)
info = {
'formats': [],
}
if len(video_id) == 5:
if appname == 'bttv':
info = self._extract_video_info('btno', video_id)
elif appname == 'aptv':
info = self._extract_video_info('ap', video_id)
streams = data['streamUrls'] streams = data['streamUrls']
stream_type = data.get('streamType') stream_type = data.get('streamType')
@ -111,48 +154,53 @@ def _real_extract(self, url):
hls_url = streams.get('hls') hls_url = streams.get('hls')
if hls_url: if hls_url:
formats.extend(self._extract_m3u8_formats( m3u8_formats = self._extract_m3u8_formats(
hls_url, video_id, 'mp4', m3u8_id='hls')) hls_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
hds_url = streams.get('hds') hds_url = streams.get('hds')
# wasLive hds are always 404 # wasLive hds are always 404
if hds_url and stream_type != 'wasLive': if hds_url and stream_type != 'wasLive':
formats.extend(self._extract_f4m_formats( f4m_formats = self._extract_f4m_formats(
hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id, f4m_id='hds', fatal=False)
video_id, f4m_id='hds')) if f4m_formats:
formats.extend(f4m_formats)
mp4_urls = streams.get('pseudostreaming') or []
mp4_url = streams.get('mp4') mp4_url = streams.get('mp4')
if mp4_url: if mp4_url:
_url = hls_url or hds_url mp4_urls.append(mp4_url)
MP4_URL_TEMPLATE = '%s/%%s.%s' % (mp4_url.rpartition('/')[0], mp4_url.rpartition('.')[-1]) for mp4_url in mp4_urls:
for mp4_format in _url.split(','): format_info = {
m = re.search('(?P<width>\d+)_(?P<height>\d+)_(?P<vbr>\d+)', mp4_format) 'url': mp4_url,
if not m: }
continue mobj = re.search('(\d+)_(\d+)_(\d+)', mp4_url)
width = int(m.group('width')) if mobj:
height = int(m.group('height')) tbr = int(mobj.group(3))
vbr = int(m.group('vbr')) format_info.update({
formats.append({ 'width': int(mobj.group(1)),
'url': MP4_URL_TEMPLATE % mp4_format, 'height': int(mobj.group(2)),
'format_id': 'mp4-%s' % vbr, 'tbr': tbr,
'width': width, 'format_id': 'mp4-%s' % tbr,
'height': height,
'vbr': vbr,
'preference': 1,
}) })
self._sort_formats(formats) formats.append(format_info)
return { info['formats'].extend(formats)
self._sort_formats(info['formats'])
info.update({
'id': video_id, 'id': video_id,
'title': self._live_title(data['title']), 'title': self._live_title(data['title']) if stream_type == 'live' else data['title'],
'description': data['description'], 'description': data['description'],
'thumbnail': data['images']['main'] + '?t[]=900x506q80', 'thumbnail': data['images']['main'] + '?t[]=900x506q80',
'timestamp': data['published'], 'timestamp': data['published'],
'duration': float_or_none(data['duration'], 1000), 'duration': float_or_none(data['duration'], 1000),
'view_count': data['displays'], 'view_count': data['displays'],
'formats': formats,
'is_live': True if stream_type == 'live' else False, 'is_live': True if stream_type == 'live' else False,
} })
return info
class BTArticleIE(InfoExtractor): class BTArticleIE(InfoExtractor):
@ -161,7 +209,7 @@ class BTArticleIE(InfoExtractor):
_VALID_URL = 'http://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html' _VALID_URL = 'http://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html'
_TEST = { _TEST = {
'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html', 'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html',
'md5': 'd055e8ee918ef2844745fcfd1a4175fb', 'md5': '2acbe8ad129b3469d5ae51b1158878df',
'info_dict': { 'info_dict': {
'id': '23199', 'id': '23199',
'ext': 'mp4', 'ext': 'mp4',
@ -178,15 +226,15 @@ class BTArticleIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
webpage = self._download_webpage(url, self._match_id(url)) webpage = self._download_webpage(url, self._match_id(url))
video_id = self._search_regex( video_id = self._search_regex(
r'SVP\.Player\.load\(\s*(\d+)', webpage, 'video id') r'<video[^>]+data-id="(\d+)"', webpage, 'video id')
return self.url_result('vgtv:bt:%s' % video_id, 'VGTV') return self.url_result('bttv:%s' % video_id, 'VGTV')
class BTVestlendingenIE(InfoExtractor): class BTVestlendingenIE(InfoExtractor):
IE_NAME = 'bt:vestlendingen' IE_NAME = 'bt:vestlendingen'
IE_DESC = 'Bergens Tidende - Vestlendingen' IE_DESC = 'Bergens Tidende - Vestlendingen'
_VALID_URL = 'http://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)' _VALID_URL = 'http://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)'
_TEST = { _TESTS = [{
'url': 'http://www.bt.no/spesial/vestlendingen/#!/86588', 'url': 'http://www.bt.no/spesial/vestlendingen/#!/86588',
'md5': 'd7d17e3337dc80de6d3a540aefbe441b', 'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
'info_dict': { 'info_dict': {
@ -197,7 +245,19 @@ class BTVestlendingenIE(InfoExtractor):
'timestamp': 1430473209, 'timestamp': 1430473209,
'upload_date': '20150501', 'upload_date': '20150501',
}, },
} 'skip': '404 Error',
}, {
'url': 'http://www.bt.no/spesial/vestlendingen/#!/86255',
'md5': 'a2893f8632e96389f4bdf36aa9463ceb',
'info_dict': {
'id': '86255',
'ext': 'mov',
'title': 'Du må tåle å fryse og være sulten',
'description': 'md5:b8046f4d022d5830ddab04865791d063',
'upload_date': '20150321',
'timestamp': 1426942023,
},
}]
def _real_extract(self, url): def _real_extract(self, url):
return self.url_result('xstream:btno:%s' % self._match_id(url), 'Xstream') return self.url_result('bttv:%s' % self._match_id(url), 'VGTV')

View file

@ -42,11 +42,7 @@ class XstreamIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
def _real_extract(self, url): def _extract_video_info(self, partner_id, video_id):
mobj = re.match(self._VALID_URL, url)
partner_id = mobj.group('partner_id')
video_id = mobj.group('id')
data = self._download_xml( data = self._download_xml(
'http://frontend.xstream.dk/%s/feed/video/?platform=web&id=%s' 'http://frontend.xstream.dk/%s/feed/video/?platform=web&id=%s'
% (partner_id, video_id), % (partner_id, video_id),
@ -97,6 +93,7 @@ def _real_extract(self, url):
formats.append({ formats.append({
'url': link.get('href'), 'url': link.get('href'),
'format_id': link.get('rel'), 'format_id': link.get('rel'),
'preference': 1,
}) })
thumbnails = [{ thumbnails = [{
@ -113,3 +110,10 @@ def _real_extract(self, url):
'formats': formats, 'formats': formats,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
} }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
partner_id = mobj.group('partner_id')
video_id = mobj.group('id')
return self._extract_video_info(partner_id, video_id)