Merge remote-tracking branch 'pulpe/tagesschau'

This commit is contained in:
Philipp Hagemeister 2014-06-07 12:43:31 +02:00
commit ebdb64d605
2 changed files with 83 additions and 0 deletions

View file

@ -266,6 +266,7 @@
from .swrmediathek import SWRMediathekIE from .swrmediathek import SWRMediathekIE
from .syfy import SyfyIE from .syfy import SyfyIE
from .sztvhu import SztvHuIE from .sztvhu import SztvHuIE
from .tagesschau import TagesschauIE
from .teamcoco import TeamcocoIE from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE from .techtalks import TechTalksIE
from .ted import TEDIE from .ted import TEDIE

View file

@ -0,0 +1,82 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class TagesschauIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/video/video(?P<id>-?\d+)\.html'
_TESTS = [{
'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html',
'md5': 'bcdeac2194fb296d599ce7929dfa4009',
'info_dict': {
'id': '1399128',
'ext': 'mp4',
'title': 'Harald Range, Generalbundesanwalt, zu den Ermittlungen',
'description': 'md5:69da3c61275b426426d711bde96463ab',
'thumbnail': 're:^http:.*\.jpg$',
},
}, {
'url': 'http://www.tagesschau.de/multimedia/video/video-196.html',
'md5': '8aaa8bf3ae1ca2652309718c03019128',
'info_dict': {
'id': '196',
'ext': 'mp4',
'title': 'Ukraine-Konflikt: Klitschko in Kiew als B\xfcrgermeister vereidigt',
'description': 'md5:f22e4af75821d174fa6c977349682691',
'thumbnail': 're:http://.*\.jpg',
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
if video_id.startswith('-'):
display_id = video_id.strip('-')
else:
display_id = video_id
webpage = self._download_webpage(url, display_id)
playerpage = self._download_webpage(
'http://www.tagesschau.de/multimedia/video/video%s~player_autoplay-true.html' % video_id, display_id, 'Downloading player page')
medias = re.findall(r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"', playerpage)
formats = []
for url, ext, res in medias:
if res == 's':
res = 'small'
quality = 0
elif res == 'm':
res = 'medium'
quality = 1
elif res == 'l':
res = 'large'
quality = 2
else:
quality = 0
formats.append({
'format_id': res+'_'+ext,
'url': url,
'quality': quality,
'ext': ext,
})
self._sort_formats(formats)
thumbnail = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1]
return {
'id': display_id,
'title': self._og_search_title(webpage).strip(),
'thumbnail': 'http://www.tagesschau.de'+thumbnail,
'formats': formats,
'description': self._og_search_description(webpage).strip(),
}