Merge remote-tracking branch 'upstream/master'

This commit is contained in:
rupertbaxter2 2014-08-13 04:22:45 -07:00
commit f96252b913
15 changed files with 165 additions and 81 deletions

View file

@ -99,6 +99,7 @@ def test_youtube_extract(self):
def test_facebook_matching(self): def test_facebook_matching(self):
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268')) self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793'))
def test_no_duplicates(self): def test_no_duplicates(self):
ies = gen_extractors() ies = gen_extractors()

View file

@ -280,7 +280,7 @@ def test_strip_jsonp(self):
d = json.loads(stripped) d = json.loads(stripped)
self.assertEqual(d, [{"id": "532cb", "x": 3}]) self.assertEqual(d, [{"id": "532cb", "x": 3}])
def test_uppercase_escpae(self): def test_uppercase_escape(self):
self.assertEqual(uppercase_escape(u''), u'') self.assertEqual(uppercase_escape(u''), u'')
self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐') self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')

View file

@ -68,6 +68,7 @@
'Hassaan Ali', 'Hassaan Ali',
'Dobrosław Żybort', 'Dobrosław Żybort',
'David Fabijan', 'David Fabijan',
'Sebastian Haas',
) )
__license__ = 'Public Domain' __license__ = 'Public Domain'

View file

@ -295,7 +295,7 @@ def download(self, filename, info_dict):
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
"""Real download process. Redefine in subclasses.""" """Real download process. Redefine in subclasses."""
raise NotImplementedError(u'This method must be implemented by sublcasses') raise NotImplementedError(u'This method must be implemented by subclasses')
def _hook_progress(self, status): def _hook_progress(self, status):
for ph in self._progress_hooks: for ph in self._progress_hooks:

View file

@ -225,9 +225,12 @@
from .ntv import NTVIE from .ntv import NTVIE
from .nytimes import NYTimesIE from .nytimes import NYTimesIE
from .nuvid import NuvidIE from .nuvid import NuvidIE
from .oe1 import OE1IE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from .orf import ORFIE from .orf import (
ORFTVthekIE,
ORFOE1IE,
ORFFM4IE,
)
from .parliamentliveuk import ParliamentLiveUKIE from .parliamentliveuk import ParliamentLiveUKIE
from .pbs import PBSIE from .pbs import PBSIE
from .photobucket import PhotobucketIE from .photobucket import PhotobucketIE

View file

@ -6,6 +6,7 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urlparse, compat_urlparse,
int_or_none,
) )
@ -110,8 +111,8 @@ def _clean_json(m):
formats.append({ formats.append({
'url': format_url, 'url': format_url,
'format': format['type'], 'format': format['type'],
'width': format['width'], 'width': int_or_none(format['width']),
'height': int(format['height']), 'height': int_or_none(format['height']),
}) })
self._sort_formats(formats) self._sort_formats(formats)

View file

@ -51,6 +51,9 @@ def _real_extract(self, url):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
title = self._html_search_regex( title = self._html_search_regex(
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
r'<meta name="dcterms.title" content="(.*?)"/>', r'<meta name="dcterms.title" content="(.*?)"/>',

View file

@ -109,15 +109,19 @@ def _match_lang(f):
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
return any(re.match(r, f['versionCode']) for r in regexes) return any(re.match(r, f['versionCode']) for r in regexes)
# Some formats may not be in the same language as the url # Some formats may not be in the same language as the url
# TODO: Might want not to drop videos that does not match requested language
# but to process those formats with lower precedence
formats = filter(_match_lang, all_formats) formats = filter(_match_lang, all_formats)
formats = list(formats) # in python3 filter returns an iterator formats = list(formats) # in python3 filter returns an iterator
if not formats: if not formats:
# Some videos are only available in the 'Originalversion' # Some videos are only available in the 'Originalversion'
# they aren't tagged as being in French or German # they aren't tagged as being in French or German
if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats): # Sometimes there are neither videos of requested lang code
formats = all_formats # nor original version videos available
else: # For such cases we just take all_formats as is
raise ExtractorError(u'The formats list is empty') formats = all_formats
if not formats:
raise ExtractorError('The formats list is empty')
if re.match(r'[A-Z]Q', formats[0]['quality']) is not None: if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
def sort_key(f): def sort_key(f):

View file

@ -20,7 +20,7 @@
class FacebookIE(InfoExtractor): class FacebookIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?://(?:\w+\.)?facebook\.com/ https?://(?:\w+\.)?facebook\.com/
(?:[^#?]*\#!/)? (?:[^#]*?\#!/)?
(?:video/video\.php|photo\.php|video/embed)\?(?:.*?) (?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
(?:v|video_id)=(?P<id>[0-9]+) (?:v|video_id)=(?P<id>[0-9]+)
(?:.*)''' (?:.*)'''

View file

@ -1,40 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import calendar
import datetime
import re
from .common import InfoExtractor
# audios on oe1.orf.at are only available for 7 days, so we can't
# add tests.
class OE1IE(InfoExtractor):
IE_DESC = 'oe1.orf.at'
_VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
show_id = mobj.group('id')
data = self._download_json(
'http://oe1.orf.at/programm/%s/konsole' % show_id,
show_id
)
timestamp = datetime.datetime.strptime('%s %s' % (
data['item']['day_label'],
data['item']['time']
), '%d.%m.%Y %H:%M')
unix_timestamp = calendar.timegm(timestamp.utctimetuple())
return {
'id': show_id,
'title': data['item']['title'],
'url': data['item']['url_stream'],
'ext': 'mp3',
'description': data['item'].get('info'),
'timestamp': unix_timestamp
}

View file

@ -3,23 +3,38 @@
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import unescapeHTML from ..utils import (
unescapeHTML,
ExtractorError,
)
class OoyalaIE(InfoExtractor): class OoyalaIE(InfoExtractor):
_VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)' _VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'
_TEST = { _TESTS = [
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video {
'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
'md5': '3f5cceb3a7bf461d6c29dc466cf8033c', 'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
'info_dict': { 'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', 'info_dict': {
'ext': 'mp4', 'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
'title': 'Explaining Data Recovery from Hard Drives and SSDs', 'ext': 'mp4',
'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', 'title': 'Explaining Data Recovery from Hard Drives and SSDs',
'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
},
}, {
# Only available for ipad
'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
'md5': '4b9754921fddb68106e48c142e2a01e6',
'info_dict': {
'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
'ext': 'mp4',
'title': 'Simulation Overview - Levels of Simulation',
'description': '',
},
}, },
} ]
@staticmethod @staticmethod
def _url_for_embed_code(embed_code): def _url_for_embed_code(embed_code):
@ -47,13 +62,30 @@ def _real_extract(self, url):
player = self._download_webpage(player_url, embedCode) player = self._download_webpage(player_url, embedCode)
mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="', mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
player, 'mobile player url') player, 'mobile player url')
mobile_player = self._download_webpage(mobile_url, embedCode) # Looks like some videos are only available for particular devices
videos_info = self._search_regex( # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0
r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);', # is only available for ipad)
mobile_player, 'info').replace('\\"','"') # Working around with fetching URLs for all the devices found starting with 'unknown'
videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"','"') # until we succeed or eventually fail for each device.
devices = re.findall(r'device\s*=\s*"([^"]+)";', player)
devices.remove('unknown')
devices.insert(0, 'unknown')
for device in devices:
mobile_player = self._download_webpage(
'%s&device=%s' % (mobile_url, device), embedCode,
'Downloading mobile player JS for %s device' % device)
videos_info = self._search_regex(
r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
mobile_player, 'info', fatal=False, default=None)
if videos_info:
break
if not videos_info:
raise ExtractorError('Unable to extract info')
videos_info = videos_info.replace('\\"', '"')
videos_more_info = self._search_regex(
r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"')
videos_info = json.loads(videos_info) videos_info = json.loads(videos_info)
videos_more_info =json.loads(videos_more_info) videos_more_info = json.loads(videos_more_info)
if videos_more_info.get('lineup'): if videos_more_info.get('lineup'):
videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])] videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]

View file

@ -3,6 +3,8 @@
import json import json
import re import re
import calendar
import datetime
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -12,7 +14,9 @@
) )
class ORFIE(InfoExtractor): class ORFTVthekIE(InfoExtractor):
IE_NAME = 'orf:tvthek'
IE_DESC = 'ORF TVthek'
_VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)' _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)'
_TEST = { _TEST = {
@ -105,3 +109,73 @@ def quality_to_int(s):
'entries': entries, 'entries': entries,
'id': playlist_id, 'id': playlist_id,
} }
# Audios on ORF radio are only available for 7 days, so we can't add tests.
class ORFOE1IE(InfoExtractor):
IE_NAME = 'orf:oe1'
IE_DESC = 'Radio Österreich 1'
_VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
show_id = mobj.group('id')
data = self._download_json(
'http://oe1.orf.at/programm/%s/konsole' % show_id,
show_id
)
timestamp = datetime.datetime.strptime('%s %s' % (
data['item']['day_label'],
data['item']['time']
), '%d.%m.%Y %H:%M')
unix_timestamp = calendar.timegm(timestamp.utctimetuple())
return {
'id': show_id,
'title': data['item']['title'],
'url': data['item']['url_stream'],
'ext': 'mp3',
'description': data['item'].get('info'),
'timestamp': unix_timestamp
}
class ORFFM4IE(InfoExtractor):
IE_DESC = 'orf:fm4'
IE_DESC = 'radio FM4'
_VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
show_date = mobj.group('date')
show_id = mobj.group('show')
data = self._download_json(
'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id),
show_id
)
def extract_entry_dict(info, title, subtitle):
return {
'id': info['loopStreamId'].replace('.mp3', ''),
'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'],
'title': title,
'description': subtitle,
'duration': (info['end'] - info['start']) / 1000,
'timestamp': info['start'] / 1000,
'ext': 'mp3'
}
entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']]
return {
'_type': 'playlist',
'id': show_id,
'title': data['title'],
'description': data['subtitle'],
'entries': entries
}

View file

@ -1,23 +1,23 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import time
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import strip_jsonp from ..utils import str_or_none
class ReverbNationIE(InfoExtractor): class ReverbNationIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$' _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
_TESTS = [{ _TESTS = [{
'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
'file': '16965047.mp3',
'md5': '3da12ebca28c67c111a7f8b262d3f7a7', 'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
'info_dict': { 'info_dict': {
"id": "16965047",
"ext": "mp3",
"title": "MONA LISA", "title": "MONA LISA",
"uploader": "ALKILADOS", "uploader": "ALKILADOS",
"uploader_id": 216429, "uploader_id": "216429",
"thumbnail": "//gp1.wac.edgecastcdn.net/802892/production_public/Photo/13761700/image/1366002176_AVATAR_MONA_LISA.jpg" "thumbnail": "re:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$"
}, },
}] }]
@ -26,10 +26,8 @@ def _real_extract(self, url):
song_id = mobj.group('id') song_id = mobj.group('id')
api_res = self._download_json( api_res = self._download_json(
'https://api.reverbnation.com/song/%s?callback=api_response_5&_=%d' 'https://api.reverbnation.com/song/%s' % song_id,
% (song_id, int(time.time() * 1000)),
song_id, song_id,
transform_source=strip_jsonp,
note='Downloading information of song %s' % song_id note='Downloading information of song %s' % song_id
) )
@ -38,8 +36,9 @@ def _real_extract(self, url):
'title': api_res.get('name'), 'title': api_res.get('name'),
'url': api_res.get('url'), 'url': api_res.get('url'),
'uploader': api_res.get('artist', {}).get('name'), 'uploader': api_res.get('artist', {}).get('name'),
'uploader_id': api_res.get('artist', {}).get('id'), 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')),
'thumbnail': api_res.get('image', api_res.get('thumbnail')), 'thumbnail': self._proto_relative_url(
api_res.get('image', api_res.get('thumbnail'))),
'ext': 'mp3', 'ext': 'mp3',
'vcodec': 'none', 'vcodec': 'none',
} }

View file

@ -1273,9 +1273,15 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
if get_attr: if get_attr:
if v is not None: if v is not None:
v = getattr(v, get_attr, None) v = getattr(v, get_attr, None)
if v == '':
v = None
return default if v is None else (int(v) * invscale // scale) return default if v is None else (int(v) * invscale // scale)
def str_or_none(v, default=None):
return default if v is None else compat_str(v)
def str_to_int(int_str): def str_to_int(int_str):
if int_str is None: if int_str is None:
return None return None

View file

@ -1,2 +1,2 @@
__version__ = '2014.08.05' __version__ = '2014.08.10'