Merge remote-tracking branch 'origin/master'

This commit is contained in:
Philipp Hagemeister 2015-02-16 04:09:10 +01:00
commit 5bfd430f81
16 changed files with 163 additions and 48 deletions

View file

@ -110,3 +110,4 @@ Shaya Goldberg
Paul Hartmann
Frans de Jonge
Robin de Rooij
Ryan Schmidt

View file

@ -138,7 +138,7 @@ def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 5)
self.assertTrue(len(subtitles.keys()) >= 6)
def test_list_subtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
@ -247,7 +247,7 @@ def test_no_writesubtitles(self):
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4')
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
@ -334,7 +334,7 @@ def test_allsubtitles(self):
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['cs']))
self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4')
self.assertTrue(len(subtitles['cs']) > 20000)
def test_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles')

View file

@ -189,6 +189,7 @@
from .helsinki import HelsinkiIE
from .hentaistigma import HentaiStigmaIE
from .historicfilms import HistoricFilmsIE
from .history import HistoryIE
from .hitbox import HitboxIE, HitboxLiveIE
from .hornbunny import HornBunnyIE
from .hostingbulk import HostingBulkIE

View file

@ -50,7 +50,7 @@ def _real_extract(self, url):
'duration': int(info['length']),
'view_count': int(info['views_total']),
'uploader': info['username'],
'uploader_id': info['uid'],
'uploader_id': info['owner']['uid'],
}

View file

@ -273,7 +273,7 @@ def _download_playlist(self, playlist_id):
formats, subtitles = self._download_media_selector(programme_id)
return programme_id, title, description, duration, formats, subtitles
except ExtractorError as ee:
if not isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
raise
# fallback to legacy playlist

View file

@ -9,7 +9,7 @@ class BeegIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
_TEST = {
'url': 'http://beeg.com/5416503',
'md5': '634526ae978711f6b748fe0dd6c11f57',
'md5': '1bff67111adb785c51d1b42959ec10e5',
'info_dict': {
'id': '5416503',
'ext': 'mp4',

View file

@ -16,7 +16,7 @@
class CamdemyIE(InfoExtractor):
_VALID_URL = r'http://www.camdemy.com/media/(?P<id>\d+)'
_VALID_URL = r'http://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
_TESTS = [{
# single file
'url': 'http://www.camdemy.com/media/5181/',

View file

@ -665,7 +665,7 @@ def _media_rating_search(self, html):
return RATING_TABLE.get(rating.lower(), None)
def _family_friendly_search(self, html):
# See http://schema.org/VideoObj
# See http://schema.org/VideoObject
family_friendly = self._html_search_meta('isFamilyFriendly', html)
if not family_friendly:

View file

@ -15,7 +15,7 @@ class DrTuberIE(InfoExtractor):
'id': '1740434',
'display_id': 'hot-perky-blonde-naked-golf',
'ext': 'mp4',
'title': 'Hot Perky Blonde Naked Golf',
'title': 'hot perky blonde naked golf',
'like_count': int,
'dislike_count': int,
'comment_count': int,
@ -36,7 +36,8 @@ def _real_extract(self, url):
r'<source src="([^"]+)"', webpage, 'video URL')
title = self._html_search_regex(
r'<title>([^<]+)\s*-\s*Free', webpage, 'title')
[r'class="hd_title" style="[^"]+">([^<]+)</h1>', r'<title>([^<]+) - \d+'],
webpage, 'title')
thumbnail = self._html_search_regex(
r'poster="([^"]+)"',

View file

@ -1,52 +1,71 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
class FirstTVIE(InfoExtractor):
IE_NAME = 'firsttv'
IE_DESC = 'Видеоархив - Первый канал'
_VALID_URL = r'http://(?:www\.)?1tv\.ru/videoarchive/(?P<id>\d+)'
IE_NAME = '1tv'
IE_DESC = 'Первый канал'
_VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)'
_TEST = {
_TESTS = [{
'url': 'http://www.1tv.ru/videoarchive/73390',
'md5': '3de6390cf0cca4a5eae1d1d83895e5ad',
'md5': '777f525feeec4806130f4f764bc18a4f',
'info_dict': {
'id': '73390',
'ext': 'mp4',
'title': 'Олимпийские канатные дороги',
'description': 'md5:cc730d2bf4215463e37fff6a1e277b13',
'thumbnail': 'http://img1.1tv.ru/imgsize640x360/PR20140210114657.JPG',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
'duration': 149,
'like_count': int,
'dislike_count': int,
},
'skip': 'Only works from Russia',
}
}, {
'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930',
'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
'info_dict': {
'id': '35930',
'ext': 'mp4',
'title': 'Наедине со всеми. Людмила Сенчина',
'description': 'md5:89553aed1d641416001fe8d450f06cb9',
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
'duration': 2694,
},
'skip': 'Only works from Russia',
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id, 'Downloading page')
video_url = self._html_search_regex(
r'''(?s)jwplayer\('flashvideoportal_1'\)\.setup\({.*?'file': '([^']+)'.*?}\);''', webpage, 'video URL')
r'''(?s)(?:jwplayer\('flashvideoportal_1'\)\.setup\({|var\s+playlistObj\s*=).*?'file'\s*:\s*'([^']+)'.*?}\);''',
webpage, 'video URL')
title = self._html_search_regex(
r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', webpage, 'title')
[r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
r"'title'\s*:\s*'([^']+)'"], webpage, 'title')
description = self._html_search_regex(
r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>', webpage, 'description', fatal=False)
r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>',
webpage, 'description', default=None) or self._html_search_meta(
'description', webpage, 'description')
thumbnail = self._og_search_thumbnail(webpage)
duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False)
duration = self._og_search_property(
'video:duration', webpage,
'video duration', fatal=False)
like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]',
webpage, 'like count', fatal=False)
dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]',
webpage, 'dislike count', fatal=False)
like_count = self._html_search_regex(
r'title="Понравилось".*?/></label> \[(\d+)\]',
webpage, 'like count', default=None)
dislike_count = self._html_search_regex(
r'title="Не понравилось".*?/></label> \[(\d+)\]',
webpage, 'dislike count', default=None)
return {
'id': video_id,

View file

@ -0,0 +1,31 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import smuggle_url
class HistoryIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?history\.com/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
_TESTS = [{
'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
'md5': '6fe632d033c92aa10b8d4a9be047a7c5',
'info_dict': {
'id': 'bLx5Dv5Aka1G',
'ext': 'mp4',
'title': "Bet You Didn't Know: Valentine's Day",
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
},
'add_ie': ['ThePlatform'],
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
webpage, 'video url')
return self.url_result(smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}}))

View file

@ -1,7 +1,6 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..compat import (
@ -52,9 +51,9 @@ def _real_extract(self, url):
class NBCNewsIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://www\.nbcnews\.com/
((video/.+?/(?P<id>\d+))|
(feature/[^/]+/(?P<title>.+)))
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
(?:video/.+?/(?P<id>\d+)|
(?:feature|nightly-news)/[^/]+/(?P<title>.+))
'''
_TESTS = [
@ -89,6 +88,16 @@ class NBCNewsIE(InfoExtractor):
'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
},
},
{
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
'md5': 'b5dda8cddd8650baa0dcb616dd2cf60d',
'info_dict': {
'id': 'sekXqyTVnmN3',
'ext': 'mp4',
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
},
},
]
def _real_extract(self, url):
@ -107,13 +116,13 @@ def _real_extract(self, url):
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
}
else:
# "feature" pages use theplatform.com
# "feature" and "nightly-news" pages use theplatform.com
title = mobj.group('title')
webpage = self._download_webpage(url, title)
bootstrap_json = self._search_regex(
r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json',
flags=re.MULTILINE)
bootstrap = json.loads(bootstrap_json)
r'var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$',
webpage, 'bootstrap json', flags=re.MULTILINE)
bootstrap = self._parse_json(bootstrap_json, video_id)
info = bootstrap['results'][0]['video']
mpxid = info['mpxId']

View file

@ -1,14 +1,30 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import hashlib
import time
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
int_or_none,
)
def _get_api_key(api_path):
if api_path.endswith('?'):
api_path = api_path[:-1]
api_key = 'fb5f58a820353bd7095de526253c14fd'
a = '{0:}{1:}{2:}'.format(api_key, api_path, int(round(time.time() / 24 / 3600)))
return hashlib.md5(a.encode('ascii')).hexdigest()
class StreamCZIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<id>[0-9]+)'
_API_URL = 'http://www.stream.cz/API'
_TESTS = [{
'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
@ -36,8 +52,11 @@ class StreamCZIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'http://www.stream.cz/API/episode/%s' % video_id, video_id)
api_path = '/episode/%s' % video_id
req = compat_urllib_request.Request(self._API_URL + api_path)
req.add_header('Api-Password', _get_api_key(api_path))
data = self._download_json(req, video_id)
formats = []
for quality, video in enumerate(data['video_qualities']):

View file

@ -52,7 +52,7 @@ def _real_extract(self, url):
formats = []
quality = qualities(['mp4', 'flv'])
for video_url in re.findall(r'<source src="([^"]+)"', webpage):
for video_url in re.findall(r'<(?:source|video) src="([^"]+)"', webpage):
video_ext = determine_ext(video_url)
formats.append({
'url': video_url,

View file

@ -2,6 +2,11 @@
import re
import json
import time
import hmac
import binascii
import hashlib
from .subtitles import SubtitlesInfoExtractor
from ..compat import (
@ -11,6 +16,7 @@
determine_ext,
ExtractorError,
xpath_with_ns,
unsmuggle_url,
)
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
@ -18,7 +24,7 @@
class ThePlatformIE(SubtitlesInfoExtractor):
_VALID_URL = r'''(?x)
(?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
|theplatform:)(?P<id>[^/\?&]+)'''
@ -38,9 +44,33 @@ class ThePlatformIE(SubtitlesInfoExtractor):
},
}
@staticmethod
def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
flags = '10' if include_qs else '00'
expiration_date = '%x' % (int(time.time()) + life)
def str_to_hex(str):
return binascii.b2a_hex(str.encode('ascii')).decode('ascii')
def hex_to_str(hex):
return binascii.a2b_hex(hex)
relative_path = url.split('http://link.theplatform.com/s/')[1].split('?')[0]
clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path))
checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
return '%s&sig=%s' % (url, sig)
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
mobj = re.match(self._VALID_URL, url)
provider_id = mobj.group('provider_id')
video_id = mobj.group('id')
if not provider_id:
provider_id = 'dJ5BDC'
if mobj.group('config'):
config_url = url + '&form=json'
config_url = config_url.replace('swf/', 'config/')
@ -48,8 +78,12 @@ def _real_extract(self, url):
config = self._download_json(config_url, video_id, 'Downloading config')
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
else:
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
'format=smil&mbr=true'.format(video_id))
smil_url = ('http://link.theplatform.com/s/{0}/{1}/meta.smil?'
'format=smil&mbr=true'.format(provider_id, video_id))
sig = smuggled_data.get('sig')
if sig:
smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
meta = self._download_xml(smil_url, video_id)
try:
@ -62,7 +96,7 @@ def _real_extract(self, url):
else:
raise ExtractorError(error_msg, expected=True)
info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
info_url = 'http://link.theplatform.com/s/{0}/{1}?format=preview'.format(provider_id, video_id)
info_json = self._download_webpage(info_url, video_id)
info = json.loads(info_json)

View file

@ -138,7 +138,7 @@ def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
if self._downloader.params.get('verbose', False):
self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode != 0:
stderr = stderr.decode('utf-8', 'replace')
@ -178,8 +178,8 @@ def get_audio_codec(self, path):
encodeArgument('-show_streams'),
encodeFilename(self._ffmpeg_filename_argument(path), True)]
if self._downloader.params.get('verbose', False):
self._downloader.to_screen('[debug] ffprobe command line: %s' % shell_quote(cmd))
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd)))
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE)
output = handle.communicate()[0]
if handle.wait() != 0:
return None