[http] Reject broken range before request (#3079)

* And fix filesize estimate for byterange downloads

Closes #2001
Authored by: Lesmiscore, Jules-A, pukkandan
This commit is contained in:
Lesmiscore (Naoya Ozaki) 2022-03-19 10:15:01 +09:00 committed by GitHub
parent e6552207da
commit be4685ab7b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -18,6 +18,7 @@
parse_http_range, parse_http_range,
sanitized_Request, sanitized_Request,
ThrottledDownload, ThrottledDownload,
try_get,
write_xattr, write_xattr,
XAttrMetadataError, XAttrMetadataError,
XAttrUnavailableError, XAttrUnavailableError,
@ -55,7 +56,6 @@ class DownloadContext(dict):
ctx.open_mode = 'wb' ctx.open_mode = 'wb'
ctx.resume_len = 0 ctx.resume_len = 0
ctx.data_len = None
ctx.block_size = self.params.get('buffersize', 1024) ctx.block_size = self.params.get('buffersize', 1024)
ctx.start_time = time.time() ctx.start_time = time.time()
ctx.chunk_size = None ctx.chunk_size = None
@ -102,6 +102,8 @@ def establish_connection():
if ctx.is_resume: if ctx.is_resume:
self.report_resuming_byte(ctx.resume_len) self.report_resuming_byte(ctx.resume_len)
ctx.open_mode = 'ab' ctx.open_mode = 'ab'
elif req_start is not None:
range_start = req_start
elif ctx.chunk_size > 0: elif ctx.chunk_size > 0:
range_start = 0 range_start = 0
else: else:
@ -118,11 +120,16 @@ def establish_connection():
else: else:
range_end = None range_end = None
if range_end and ctx.data_len is not None and range_end >= ctx.data_len: if try_get(None, lambda _: range_start > range_end):
range_end = ctx.data_len - 1 ctx.resume_len = 0
has_range = range_start is not None ctx.open_mode = 'wb'
ctx.has_range = has_range raise RetryDownload(Exception(f'Conflicting range. (start={range_start} > end={range_end})'))
if try_get(None, lambda _: range_end >= ctx.content_len):
range_end = ctx.content_len - 1
request = sanitized_Request(url, request_data, headers) request = sanitized_Request(url, request_data, headers)
has_range = range_start is not None
if has_range: if has_range:
set_range(request, range_start, range_end) set_range(request, range_start, range_end)
# Establish connection # Establish connection
@ -146,7 +153,8 @@ def establish_connection():
or content_range_end == range_end or content_range_end == range_end
or content_len < range_end) or content_len < range_end)
if accept_content_len: if accept_content_len:
ctx.data_len = content_len ctx.content_len = content_len
ctx.data_len = min(content_len, req_end or content_len) - (req_start or 0)
return return
# Content-Range is either not present or invalid. Assuming remote webserver is # Content-Range is either not present or invalid. Assuming remote webserver is
# trying to send the whole file, resume is not possible, so wiping the local file # trying to send the whole file, resume is not possible, so wiping the local file
@ -154,8 +162,7 @@ def establish_connection():
self.report_unable_to_resume() self.report_unable_to_resume()
ctx.resume_len = 0 ctx.resume_len = 0
ctx.open_mode = 'wb' ctx.open_mode = 'wb'
ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None)) ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None))
return
except (compat_urllib_error.HTTPError, ) as err: except (compat_urllib_error.HTTPError, ) as err:
if err.code == 416: if err.code == 416:
# Unable to resume (requested range not satisfiable) # Unable to resume (requested range not satisfiable)
@ -331,7 +338,7 @@ def retry(e):
elif speed: elif speed:
throttle_start = None throttle_start = None
if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len: if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
ctx.resume_len = byte_counter ctx.resume_len = byte_counter
# ctx.block_size = block_size # ctx.block_size = block_size
raise NextFragment() raise NextFragment()