Merge branch 'master' into extract_info_rewrite

This commit is contained in:
Jaime Marquínez Ferrándiz 2013-03-04 22:25:46 +01:00
commit 3370abd509
5 changed files with 40 additions and 85 deletions

View file

@ -41,12 +41,6 @@ def test_youtube_playlist(self):
ytie_results = [YoutubeIE()._extract_id(r[0]) for r in dl.result] ytie_results = [YoutubeIE()._extract_id(r[0]) for r in dl.result]
self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE']) self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])
def test_issue_661(self):
dl = FakeDownloader()
ie = YoutubePlaylistIE(dl)
ie.extract('PLMCmkNmxw6Z9eduM7BZjSEh7HiU543Ig0')
self.assertTrue(len(dl.result) > 20)
def test_issue_673(self): def test_issue_673(self):
dl = FakeDownloader() dl = FakeDownloader()
ie = YoutubePlaylistIE(dl) ie = YoutubePlaylistIE(dl)

View file

@ -128,18 +128,6 @@
"file": "0732f586d7.mp4", "file": "0732f586d7.mp4",
"md5": "f647e9e90064b53b6e046e75d0241fbd" "md5": "f647e9e90064b53b6e046e75d0241fbd"
}, },
{
"name": "TweetReel",
"url": "http://tweetreel.com/?77smq",
"file": "77smq.mov",
"md5": "56b4d9ca9de467920f3f99a6d91255d6",
"info_dict": {
"uploader": "itszero",
"uploader_id": "itszero",
"upload_date": "20091225",
"description": "Installing Gentoo Linux on Powerbook G4, it turns out the sleep indicator becomes HDD activity indicator :D"
}
},
{ {
"name": "Steam", "name": "Steam",
"url": "http://store.steampowered.com/video/105600/", "url": "http://store.steampowered.com/video/105600/",

View file

@ -104,7 +104,7 @@ def __init__(self, params):
self.params = params self.params = params
if '%(stitle)s' in self.params['outtmpl']: if '%(stitle)s' in self.params['outtmpl']:
self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
@staticmethod @staticmethod
def format_bytes(bytes): def format_bytes(bytes):
@ -234,6 +234,18 @@ def trouble(self, message=None, tb=None):
raise DownloadError(message) raise DownloadError(message)
self._download_retcode = 1 self._download_retcode = 1
def report_warning(self, message):
'''
Print the message to stderr, it will be prefixed with 'WARNING:'
If stderr is a tty file the 'WARNING:' will be colored
'''
if sys.stderr.isatty():
_msg_header=u'\033[0;33mWARNING:\033[0m'
else:
_msg_header=u'WARNING:'
warning_message=u'%s %s' % (_msg_header,message)
self.to_stderr(warning_message)
def slow_down(self, start_time, byte_counter): def slow_down(self, start_time, byte_counter):
"""Sleep if the download speed is over the rate limit.""" """Sleep if the download speed is over the rate limit."""
rate_limit = self.params.get('ratelimit', None) rate_limit = self.params.get('ratelimit', None)
@ -566,7 +578,7 @@ def post_process(self, filename, ie_info):
self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename) self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
os.remove(encodeFilename(filename)) os.remove(encodeFilename(filename))
except (IOError, OSError): except (IOError, OSError):
self.to_stderr(u'WARNING: Unable to remove downloaded video file') self.report_warning(u'Unable to remove downloaded video file')
def _download_with_rtmpdump(self, filename, url, player_url, page_url): def _download_with_rtmpdump(self, filename, url, player_url, page_url):
self.report_destination(filename) self.report_destination(filename)
@ -574,7 +586,7 @@ def _download_with_rtmpdump(self, filename, url, player_url, page_url):
# Check for rtmpdump first # Check for rtmpdump first
try: try:
subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT) subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
except (OSError, IOError): except (OSError, IOError):
self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run') self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
return False return False

View file

@ -308,7 +308,7 @@ def _real_initialize(self):
else: else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError) as err: except (IOError, netrc.NetrcParseError) as err:
self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err)) self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
return return
# Set language # Set language
@ -317,7 +317,7 @@ def _real_initialize(self):
self.report_lang() self.report_lang()
compat_urllib_request.urlopen(request).read() compat_urllib_request.urlopen(request).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.to_stderr(u'WARNING: unable to set language: %s' % compat_str(err)) self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
return return
# No authentication to be performed # No authentication to be performed
@ -328,7 +328,7 @@ def _real_initialize(self):
try: try:
login_page = compat_urllib_request.urlopen(request).read().decode('utf-8') login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.to_stderr(u'WARNING: unable to fetch login page: %s' % compat_str(err)) self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
return return
galx = None galx = None
@ -372,10 +372,10 @@ def _real_initialize(self):
self.report_login() self.report_login()
login_results = compat_urllib_request.urlopen(request).read().decode('utf-8') login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None: if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password') self._downloader.report_warning(u'unable to log in: bad username or password')
return return
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err)) self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
return return
# Confirm age # Confirm age
@ -1456,7 +1456,7 @@ def _real_extract(self, query):
self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
return return
elif n > self._max_youtube_results: elif n > self._max_youtube_results:
self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) self._downloader.report_warning(u'ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
n = self._max_youtube_results n = self._max_youtube_results
self._download_n_results(query, n) self._download_n_results(query, n)
return return
@ -1538,7 +1538,7 @@ def _real_extract(self, query):
self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
return return
elif n > self._max_google_results: elif n > self._max_google_results:
self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n)) self._downloader.report_warning(u'gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
n = self._max_google_results n = self._max_google_results
self._download_n_results(query, n) self._download_n_results(query, n)
return return
@ -1622,7 +1622,7 @@ def _real_extract(self, query):
self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
return return
elif n > self._max_yahoo_results: elif n > self._max_yahoo_results:
self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n)) self._downloader.report_warning(u'yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
n = self._max_yahoo_results n = self._max_yahoo_results
self._download_n_results(query, n) self._download_n_results(query, n)
return return
@ -2080,7 +2080,7 @@ def _real_initialize(self):
else: else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError) as err: except (IOError, netrc.NetrcParseError) as err:
self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err)) self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
return return
if useremail is None: if useremail is None:
@ -2097,10 +2097,10 @@ def _real_initialize(self):
self.report_login() self.report_login()
login_results = compat_urllib_request.urlopen(request).read() login_results = compat_urllib_request.urlopen(request).read()
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None: if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
return return
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err)) self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
return return
def _real_extract(self, url): def _real_extract(self, url):
@ -2165,6 +2165,17 @@ def _real_extract(self, url):
self._downloader.trouble(u'ERROR: invalid URL: %s' % url) self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
return return
urlp = compat_urllib_parse_urlparse(url)
if urlp.path.startswith('/play/'):
request = compat_urllib_request.Request(url)
response = compat_urllib_request.urlopen(request)
redirecturl = response.geturl()
rurlp = compat_urllib_parse_urlparse(redirecturl)
file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2]
url = 'http://blip.tv/a/a-' + file_id
return self._real_extract(url)
if '?' in url: if '?' in url:
cchar = '&' cchar = '&'
else: else:
@ -3580,55 +3591,6 @@ def _real_extract(self, url):
} }
return [info] return [info]
class TweetReelIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?tweetreel\.com/[?](?P<id>[0-9a-z]+)$'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
return
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
m = re.search(r'<div id="left" status_id="([0-9]+)">', webpage)
if not m:
self._downloader.trouble(u'ERROR: Cannot find status ID')
status_id = m.group(1)
m = re.search(r'<div class="tweet_text">(.*?)</div>', webpage, flags=re.DOTALL)
if not m:
self._downloader.trouble(u'WARNING: Cannot find description')
desc = unescapeHTML(re.sub('<a.*?</a>', '', m.group(1))).strip()
m = re.search(r'<div class="tweet_info">.*?from <a target="_blank" href="https?://twitter.com/(?P<uploader_id>.+?)">(?P<uploader>.+?)</a>', webpage, flags=re.DOTALL)
if not m:
self._downloader.trouble(u'ERROR: Cannot find uploader')
uploader = unescapeHTML(m.group('uploader'))
uploader_id = unescapeHTML(m.group('uploader_id'))
m = re.search(r'<span unixtime="([0-9]+)"', webpage)
if not m:
self._downloader.trouble(u'ERROR: Cannot find upload date')
upload_date = datetime.datetime.fromtimestamp(int(m.group(1))).strftime('%Y%m%d')
title = desc
video_url = 'http://files.tweetreel.com/video/' + status_id + '.mov'
info = {
'id': video_id,
'url': video_url,
'ext': 'mov',
'title': title,
'description': desc,
'uploader': uploader,
'uploader_id': uploader_id,
'internal_id': status_id,
'upload_date': upload_date
}
return [info]
class SteamIE(InfoExtractor): class SteamIE(InfoExtractor):
_VALID_URL = r"""http://store.steampowered.com/ _VALID_URL = r"""http://store.steampowered.com/
(?P<urltype>video|app)/ #If the page is only for videos or for a game (?P<urltype>video|app)/ #If the page is only for videos or for a game
@ -3767,7 +3729,7 @@ def _real_extract(self, url):
# Get the video date # Get the video date
result = re.search(r'Date:</label>(?P<date>.*) </li>', webpage) result = re.search(r'Date:</label>(?P<date>.*) </li>', webpage)
if result is None: if result is None:
self._downloader.to_stderr(u'WARNING: unable to extract video date') self._downloader.report_warning(u'unable to extract video date')
upload_date = None upload_date = None
else: else:
upload_date = result.group('date').strip() upload_date = result.group('date').strip()
@ -3775,7 +3737,7 @@ def _real_extract(self, url):
# Get the video uploader # Get the video uploader
result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage) result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage)
if result is None: if result is None:
self._downloader.to_stderr(u'WARNING: unable to extract uploader') self._downloader.report_warning(u'unable to extract uploader')
video_uploader = None video_uploader = None
else: else:
video_uploader = result.group('uploader').strip() video_uploader = result.group('uploader').strip()
@ -4173,7 +4135,6 @@ def gen_extractors():
NBAIE(), NBAIE(),
JustinTVIE(), JustinTVIE(),
FunnyOrDieIE(), FunnyOrDieIE(),
TweetReelIE(),
SteamIE(), SteamIE(),
UstreamIE(), UstreamIE(),
RBMARadioIE(), RBMARadioIE(),

View file

@ -126,7 +126,7 @@ def _find_term_columns():
general.add_option('-i', '--ignore-errors', general.add_option('-i', '--ignore-errors',
action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
general.add_option('-r', '--rate-limit', general.add_option('-r', '--rate-limit',
dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)')
general.add_option('-R', '--retries', general.add_option('-R', '--retries',
dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
general.add_option('--buffer-size', general.add_option('--buffer-size',