mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-22 08:46:43 +00:00
Randomize archive order before populating search tree
This doesn't result in an elegant, perfectly balanced search tree, but it's absolutely good enough. This commit completely mitigates the worst-case scenario where the archive file is sorted. Signed-off-by: Jody Bruchon <jody@jodybruchon.com>
This commit is contained in:
parent
1d74d8d9f6
commit
fda63a4e87
|
@ -122,17 +122,14 @@ def __init__(self, line):
|
||||||
|
|
||||||
# Tree insertion
|
# Tree insertion
|
||||||
def at_insert(self, line):
|
def at_insert(self, line):
|
||||||
# print("at_insert: ", line)
|
|
||||||
cur = self
|
cur = self
|
||||||
while True:
|
while True:
|
||||||
# print("comparing ", line, cur.line)
|
|
||||||
if cur.line:
|
if cur.line:
|
||||||
if line < cur.line:
|
if line < cur.line:
|
||||||
if cur.left is None:
|
if cur.left is None:
|
||||||
cur.left = ArchiveTree(line)
|
cur.left = ArchiveTree(line)
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
# print("LEFT")
|
|
||||||
cur = cur.left
|
cur = cur.left
|
||||||
continue
|
continue
|
||||||
elif line > cur.line:
|
elif line > cur.line:
|
||||||
|
@ -140,7 +137,6 @@ def at_insert(self, line):
|
||||||
cur.right = ArchiveTree(line)
|
cur.right = ArchiveTree(line)
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
# print("RIGHT")
|
|
||||||
cur = cur.right
|
cur = cur.right
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
|
@ -426,43 +422,24 @@ def preload_download_archive(self):
|
||||||
if ioe.errno != errno.ENOENT:
|
if ioe.errno != errno.ENOENT:
|
||||||
raise
|
raise
|
||||||
lmax = len(lines)
|
lmax = len(lines)
|
||||||
if lmax >= 4:
|
if lmax > 10:
|
||||||
# Populate binary search tree by splitting the archive list in half
|
# Populate binary search tree by splitting the archive list in half
|
||||||
# and then adding from the outside edges inward
|
# and then adding from the outside edges inward
|
||||||
# This mitigates the worst case where the archive has been sorted
|
# This mitigates the worst case where the archive has been sorted
|
||||||
ptrLL = 0
|
pos = 0
|
||||||
ptrLR = lmax // 2
|
while pos < lmax:
|
||||||
ptrRL = ptrLR + 1
|
if lmax - pos <= 2:
|
||||||
ptrRR = lmax - 1
|
|
||||||
inserted = 0
|
|
||||||
while True:
|
|
||||||
# print("ptrs: %d %d %d %d" % (ptrLL, ptrLR, ptrRL, ptrRR))
|
|
||||||
if ptrLR > ptrLL:
|
|
||||||
self.archive.at_insert(lines[ptrLR])
|
|
||||||
inserted += 1
|
|
||||||
ptrLR -= 1;
|
|
||||||
if ptrRL < ptrRR:
|
|
||||||
self.archive.at_insert(lines[ptrRL])
|
|
||||||
inserted += 1
|
|
||||||
ptrRL += 1;
|
|
||||||
if ptrLL < ptrLR:
|
|
||||||
self.archive.at_insert(lines[ptrLL])
|
|
||||||
inserted += 1
|
|
||||||
ptrLL += 1;
|
|
||||||
if ptrRR > ptrRL:
|
|
||||||
self.archive.at_insert(lines[ptrRR])
|
|
||||||
inserted += 1
|
|
||||||
ptrRR -= 1;
|
|
||||||
if ptrLL == ptrLR and ptrRL == ptrRR:
|
|
||||||
print("inserted: %d, lmax: %d" % (inserted, lmax))
|
|
||||||
break
|
break
|
||||||
elif lmax > 0:
|
target = random.randrange(pos + 1, lmax - 1)
|
||||||
# Skip multi-line logic for a single line
|
temp = lines[pos]
|
||||||
for idx in lines:
|
lines[pos] = lines[target]
|
||||||
self.archive.at_insert(idx)
|
lines[target] = lines[pos]
|
||||||
else:
|
pos += 1
|
||||||
|
elif lmax < 1:
|
||||||
# No lines were loaded
|
# No lines were loaded
|
||||||
return False
|
return False
|
||||||
|
for x in lines:
|
||||||
|
self.archive.at_insert(x)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def check_deprecated(param, option, suggestion):
|
def check_deprecated(param, option, suggestion):
|
||||||
|
|
Loading…
Reference in a new issue