From aa3e950764337ef9800c936f4de89b31c00dfcf5 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 28 Aug 2013 11:57:13 +0200 Subject: [PATCH 1/2] Tolerate junk at the end of gzip-compressed content (#1268) --- youtube_dl/utils.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e6fa634a7..be788cf5a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -628,8 +628,23 @@ def http_response(self, req, resp): old_resp = resp # gzip if resp.headers.get('Content-encoding', '') == 'gzip': - gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r') - resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code) + content = resp.read() + gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb') + try: + uncompressed = io.BytesIO(gz.read()) + except IOError as original_ioerror: + # There may be junk add the end of the file + # See http://stackoverflow.com/q/4928560/35070 for details + for i in range(1, 1024): + try: + gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb') + uncompressed = io.BytesIO(gz.read()) + except IOError: + continue + break + else: + raise original_ioerror + resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code) resp.msg = old_resp.msg # deflate if resp.headers.get('Content-encoding', '') == 'deflate': From ae3531adf926998d42d1fb52453491c85e33b5f0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 28 Aug 2013 12:04:44 +0200 Subject: [PATCH 2/2] [generic] Fix URL concatenation When the url is something like http://example.org/foo/bar?x=y and the added is file/video.mp4 , we want http://example.org/foo/file/video.mp4 Fixes #1268. --- youtube_dl/extractor/generic.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index d034a11bb..bfc9bff49 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -166,7 +166,12 @@ def _real_extract(self, url): if video_url.startswith('//'): video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url if '://' not in video_url: - video_url = url + ('' if url.endswith('/') else '/') + video_url + up = compat_urllib_parse_urlparse(url) + if video_url.startswith('/'): + video_url = up.scheme + '://' + up.netloc + video_url + else: # relative path + video_url = (up.scheme + '://' + up.netloc + + up.path.rpartition('/')[0] + '/' + video_url) video_id = os.path.basename(video_url) # here's a fun little line of code for you: