From 01ba00ca42899436c13439226ec61651a6ea6af0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 27 Nov 2012 23:54:09 +0100 Subject: [PATCH] Prepare urllib references for 2/3 compatibility --- youtube_dl/FileDownloader.py | 15 +- youtube_dl/InfoExtractors.py | 397 +++++++++++++++++------------------ youtube_dl/__init__.py | 20 +- youtube_dl/utils.py | 29 ++- 4 files changed, 238 insertions(+), 223 deletions(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 411d01a41..89beaf453 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -9,7 +9,6 @@ import subprocess import sys import time -import urllib2 if os.name == 'nt': import ctypes @@ -461,7 +460,7 @@ def process_info(self, info_dict): success = self._do_download(filename, info_dict) except (OSError, IOError) as err: raise UnavailableVideoError - except (urllib2.URLError, httplib.HTTPException, socket.error) as err: + except (compat_urllib_error.URLError, httplib.HTTPException, socket.error) as err: self.trouble(u'ERROR: unable to download video data: %s' % str(err)) return except (ContentTooShortError, ) as err: @@ -585,8 +584,8 @@ def _do_download(self, filename, info_dict): # Do not include the Accept-Encoding header headers = {'Youtubedl-no-compression': 'True'} - basic_request = urllib2.Request(url, None, headers) - request = urllib2.Request(url, None, headers) + basic_request = compat_urllib_request.Request(url, None, headers) + request = compat_urllib_request.Request(url, None, headers) # Establish possible resume length if os.path.isfile(encodeFilename(tmpfilename)): @@ -610,9 +609,9 @@ def _do_download(self, filename, info_dict): try: if count == 0 and 'urlhandle' in info_dict: data = info_dict['urlhandle'] - data = urllib2.urlopen(request) + data = compat_urllib_request.urlopen(request) break - except (urllib2.HTTPError, ) as err: + except (compat_urllib_error.HTTPError, ) as err: if (err.code < 500 or err.code >= 600) and err.code != 416: # Unexpected HTTP error raise @@ -620,9 +619,9 @@ def _do_download(self, filename, info_dict): # Unable to resume (requested range not satisfiable) try: # Open the connection again without the range header - data = urllib2.urlopen(basic_request) + data = compat_urllib_request.urlopen(basic_request) content_length = data.info()['Content-Length'] - except (urllib2.HTTPError, ) as err: + except (compat_urllib_error.HTTPError, ) as err: if err.code < 500 or err.code >= 600: raise else: diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index f07735360..28731b895 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -9,8 +9,6 @@ import re import socket import time -import urllib -import urllib2 import email.utils import xml.etree.ElementTree import random @@ -53,7 +51,7 @@ class InfoExtractor(object): player_url: SWF Player URL (used for rtmpdump). subtitles: The .srt file contents. urlhandle: [internal] The urlHandle to be used to download the file, - like returned by urllib2.urlopen + like returned by urllib.request.urlopen The fields should all be Unicode strings. @@ -257,11 +255,11 @@ def _real_initialize(self): return # Set language - request = urllib2.Request(self._LANG_URL) + request = compat_urllib_request.Request(self._LANG_URL) try: self.report_lang() - urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error) as err: + compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, httplib.HTTPException, socket.error) as err: self._downloader.to_stderr(u'WARNING: unable to set language: %s' % compat_str(err)) return @@ -277,14 +275,14 @@ def _real_initialize(self): 'username': username, 'password': password, } - request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form)) + request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) try: self.report_login() - login_results = urllib2.urlopen(request).read() + login_results = compat_urllib_request.urlopen(request).read() if re.search(r'(?i)]* name="loginForm"', login_results) is not None: self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password') return - except (urllib2.URLError, httplib.HTTPException, socket.error) as err: + except (compat_urllib_error.URLError, httplib.HTTPException, socket.error) as err: self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err)) return @@ -293,11 +291,11 @@ def _real_initialize(self): 'next_url': '/', 'action_confirm': 'Confirm', } - request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form)) + request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form)) try: self.report_age_confirmation() - age_results = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error) as err: + age_results = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, httplib.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err)) return @@ -305,7 +303,7 @@ def _real_extract(self, url): # Extract original video URL from URL with redirection, like age verification, using next_url parameter mobj = re.search(self._NEXT_URL_RE, url) if mobj: - url = 'http://www.youtube.com/' + urllib.unquote(mobj.group(1)).lstrip('/') + url = 'http://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/') # Extract video id from URL mobj = re.match(self._VALID_URL, url, re.VERBOSE) @@ -316,10 +314,10 @@ def _real_extract(self, url): # Get video webpage self.report_video_webpage_download(video_id) - request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id) + request = compat_urllib_request.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id) try: - video_webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error) as err: + video_webpage = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, httplib.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return @@ -335,13 +333,13 @@ def _real_extract(self, url): for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' % (video_id, el_type)) - request = urllib2.Request(video_info_url) + request = compat_urllib_request.Request(video_info_url) try: - video_info_webpage = urllib2.urlopen(request).read() + video_info_webpage = compat_urllib_request.urlopen(request).read() video_info = parse_qs(video_info_webpage) if 'token' in video_info: break - except (urllib2.URLError, httplib.HTTPException, socket.error) as err: + except (compat_urllib_error.URLError, httplib.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err)) return if 'token' not in video_info: @@ -363,13 +361,13 @@ def _real_extract(self, url): if 'author' not in video_info: self._downloader.trouble(u'ERROR: unable to extract uploader nickname') return - video_uploader = urllib.unquote_plus(video_info['author'][0]) + video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0]) # title if 'title' not in video_info: self._downloader.trouble(u'ERROR: unable to extract video title') return - video_title = urllib.unquote_plus(video_info['title'][0]) + video_title = compat_urllib_parse.unquote_plus(video_info['title'][0]) video_title = video_title.decode('utf-8') # thumbnail image @@ -377,7 +375,7 @@ def _real_extract(self, url): self._downloader.trouble(u'WARNING: unable to extract video thumbnail') video_thumbnail = '' else: # don't panic if we can't find it - video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0]) + video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0]) # upload date upload_date = None @@ -401,10 +399,10 @@ def _real_extract(self, url): if self._downloader.params.get('writesubtitles', False): try: self.report_video_subtitles_download(video_id) - request = urllib2.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) + request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) try: - srt_list = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error) as err: + srt_list = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, httplib.HTTPException, socket.error) as err: raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err)) srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list) srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list) @@ -418,10 +416,10 @@ def _real_extract(self, url): srt_lang = srt_lang_list.keys()[0] if not srt_lang in srt_lang_list: raise Trouble(u'WARNING: no closed captions found in the specified language') - request = urllib2.Request('http://www.youtube.com/api/timedtext?lang=%s&name=%s&v=%s' % (srt_lang, srt_lang_list[srt_lang], video_id)) + request = compat_urllib_request.Request('http://www.youtube.com/api/timedtext?lang=%s&name=%s&v=%s' % (srt_lang, srt_lang_list[srt_lang], video_id)) try: - srt_xml = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error) as err: + srt_xml = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, httplib.HTTPException, socket.error) as err: raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err)) if not srt_xml: raise Trouble(u'WARNING: unable to download video subtitles') @@ -433,10 +431,10 @@ def _real_extract(self, url): self._downloader.trouble(u'WARNING: unable to extract video duration') video_duration = '' else: - video_duration = urllib.unquote_plus(video_info['length_seconds'][0]) + video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]) # token - video_token = urllib.unquote_plus(video_info['token'][0]) + video_token = compat_urllib_parse.unquote_plus(video_info['token'][0]) # Decide which formats to download req_format = self._downloader.params.get('format', None) @@ -539,11 +537,11 @@ def report_extraction(self, video_id): def _real_initialize(self): # Retrieve disclaimer - request = urllib2.Request(self._DISCLAIMER) + request = compat_urllib_request.Request(self._DISCLAIMER) try: self.report_disclaimer() - disclaimer = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error) as err: + disclaimer = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, httplib.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % compat_str(err)) return @@ -552,11 +550,11 @@ def _real_initialize(self): 'filters': '0', 'submit': "Continue - I'm over 18", } - request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form)) + request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form)) try: self.report_age_confirmation() - disclaimer = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error) as err: + disclaimer = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, httplib.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err)) return @@ -576,11 +574,11 @@ def _real_extract(self, url): return # Retrieve video webpage to extract further information - request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id) + request = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id) try: self.report_download_webpage(video_id) - webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error) as err: + webpage = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, httplib.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err)) return @@ -588,7 +586,7 @@ def _real_extract(self, url): self.report_extraction(video_id) mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage) if mobj is not None: - mediaURL = urllib.unquote(mobj.group(1)) + mediaURL = compat_urllib_parse.unquote(mobj.group(1)) video_extension = mediaURL[-3:] # Extract gdaKey if available @@ -666,12 +664,12 @@ def _real_extract(self, url): video_extension = 'mp4' # Retrieve video webpage to extract further information - request = urllib2.Request(url) + request = compat_urllib_request.Request(url) request.add_header('Cookie', 'family_filter=off') try: self.report_download_webpage(video_id) - webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error) as err: + webpage = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, httplib.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err)) return @@ -681,7 +679,7 @@ def _real_extract(self, url): if mobj is None: self._downloader.trouble(u'ERROR: unable to extract media URL') return - flashvars = urllib.unquote(mobj.group(1)) + flashvars = compat_urllib_parse.unquote(mobj.group(1)) for key in ['hd1080URL', 'hd720URL', 'hqURL', 'sdURL', 'ldURL', 'video_url']: if key in flashvars: @@ -697,7 +695,7 @@ def _real_extract(self, url): self._downloader.trouble(u'ERROR: unable to extract video URL') return - video_url = urllib.unquote(mobj.group(1)).replace('\\/', '/') + video_url = compat_urllib_parse.unquote(mobj.group(1)).replace('\\/', '/') # TODO: support choosing qualities @@ -763,11 +761,11 @@ def _real_extract(self, url): video_extension = 'mp4' # Retrieve video webpage to extract further information - request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id) + request = compat_urllib_request.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id) try: self.report_download_webpage(video_id) - webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error) as err: + webpage = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, httplib.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return @@ -780,7 +778,7 @@ def _real_extract(self, url): if mobj is None: self._downloader.trouble(u'ERROR: unable to extract media URL') return - mediaURL = urllib.unquote(mobj.group(1)) + mediaURL = compat_urllib_parse.unquote(mobj.group(1)) mediaURL = mediaURL.replace('\\x3d', '\x3d') mediaURL = mediaURL.replace('\\x26', '\x26') @@ -803,10 +801,10 @@ def _real_extract(self, url): # Extract video thumbnail if self._downloader.params.get('forcethumbnail', False): - request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id))) + request = compat_urllib_request.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id))) try: - webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error) as err: + webpage = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, httplib.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return mobj = re.search(r'', webpage) @@ -856,11 +854,11 @@ def _real_extract(self, url): video_extension = 'flv' # Retrieve video webpage to extract further information - request = urllib2.Request(url) + request = compat_urllib_request.Request(url) try: self.report_download_webpage(video_id) - webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error) as err: + webpage = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, httplib.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return @@ -870,7 +868,7 @@ def _real_extract(self, url): if mobj is None: self._downloader.trouble(u'ERROR: unable to extract media URL') return - mediaURL = urllib.unquote(mobj.group(1)) + mediaURL = compat_urllib_parse.unquote(mobj.group(1)) video_url = mediaURL @@ -925,10 +923,10 @@ def _real_extract(self, url, new_video=True): # Rewrite valid but non-extractable URLs as # extractable English language /watch/ URLs if re.match(self._VPAGE_URL, url) is None: - request = urllib2.Request(url) + request = compat_urllib_request.Request(url) try: - webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error) as err: + webpage = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, httplib.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return @@ -948,11 +946,11 @@ def _real_extract(self, url, new_video=True): return self._real_extract(url, new_video=False) # Retrieve video webpage to extract further information - request = urllib2.Request(url) + request = compat_urllib_request.Request(url) try: self.report_download_webpage(video_id) - webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error) as err: + webpage = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, httplib.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return @@ -1004,13 +1002,13 @@ def _real_extract(self, url, new_video=True): # seem to need most of them, otherwise the server sends a 401. yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents yv_bitrate = '700' # according to Wikipedia this is hard-coded - request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id + + request = compat_urllib_request.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id + '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height + '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') try: self.report_download_webpage(video_id) - webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error) as err: + webpage = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, httplib.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return @@ -1019,7 +1017,7 @@ def _real_extract(self, url, new_video=True): if mobj is None: self._downloader.trouble(u'ERROR: Unable to extract media URL') return - video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8') + video_url = compat_urllib_parse.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8') video_url = unescapeHTML(video_url) return [{ @@ -1062,11 +1060,11 @@ def _real_extract(self, url, new_video=True): video_id = mobj.group(1) # Retrieve video webpage to extract further information - request = urllib2.Request(url, None, std_headers) + request = compat_urllib_request.Request(url, None, std_headers) try: self.report_download_webpage(video_id) - webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error) as err: + webpage = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, httplib.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return @@ -1168,11 +1166,11 @@ def report_extraction(self, video_id): def fetch_webpage(self, url): self._downloader.increment_downloads() - request = urllib2.Request(url) + request = compat_urllib_request.Request(url) try: self.report_download_webpage(url) - webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error) as err: + webpage = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, httplib.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return except ValueError as err: @@ -1209,7 +1207,7 @@ def extractLiveStream(self, url): ] ) http_host = url.split('/')[2] - next_url = 'http://%s%s' % (http_host, urllib.unquote(info.get('url'))) + next_url = 'http://%s%s' % (http_host, compat_urllib_parse.unquote(info.get('url'))) info = self.grep_webpage( next_url, r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' + @@ -1234,7 +1232,7 @@ def extractPlus7Stream(self, url): (1, 'url', u'ERROR: Invalid URL: %s' % url) ] ) - next_url = urllib.unquote(info.get('url')) + next_url = compat_urllib_parse.unquote(info.get('url')) info = self.grep_webpage( next_url, r'