mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-05 23:54:24 +00:00
Merge pull request #125 from jbruchon/master
Keep download archive in memory for better performance
This commit is contained in:
commit
c71700dbe4
|
@ -114,6 +114,54 @@
|
|||
import ctypes
|
||||
|
||||
|
||||
class ArchiveTree(object):
|
||||
"""Binary search tree for download archive entries"""
|
||||
def __init__(self, line):
|
||||
self.left = None
|
||||
self.right = None
|
||||
self.line = line
|
||||
|
||||
# Tree insertion
|
||||
def at_insert(self, line):
|
||||
cur = self
|
||||
while True:
|
||||
if cur.line:
|
||||
if line < cur.line:
|
||||
if cur.left is None:
|
||||
cur.left = ArchiveTree(line)
|
||||
return
|
||||
else:
|
||||
cur = cur.left
|
||||
continue
|
||||
elif line > cur.line:
|
||||
if cur.right is None:
|
||||
cur.right = ArchiveTree(line)
|
||||
return
|
||||
else:
|
||||
cur = cur.right
|
||||
continue
|
||||
else:
|
||||
# Duplicate line found
|
||||
return
|
||||
else:
|
||||
cur.line = line
|
||||
return
|
||||
|
||||
def at_exist(self, line):
|
||||
if self.line is None:
|
||||
return False
|
||||
if line < self.line:
|
||||
if self.left is None:
|
||||
return False
|
||||
return self.left.at_exist(line)
|
||||
elif line > self.line:
|
||||
if self.right is None:
|
||||
return False
|
||||
return self.right.at_exist(line)
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
class YoutubeDL(object):
|
||||
"""YoutubeDL class.
|
||||
|
||||
|
@ -359,6 +407,39 @@ def __init__(self, params=None, auto_init=True):
|
|||
}
|
||||
self.params.update(params)
|
||||
self.cache = Cache(self)
|
||||
self.archive = ArchiveTree(None)
|
||||
|
||||
"""Preload the archive, if any is specified"""
|
||||
def preload_download_archive(self):
|
||||
lines = []
|
||||
fn = self.params.get('download_archive')
|
||||
if fn is None:
|
||||
return False
|
||||
try:
|
||||
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
|
||||
for line in archive_file:
|
||||
lines.append(line.strip())
|
||||
except IOError as ioe:
|
||||
if ioe.errno != errno.ENOENT:
|
||||
raise
|
||||
lmax = len(lines)
|
||||
if lmax > 10:
|
||||
pos = 0
|
||||
while pos < lmax:
|
||||
if lmax - pos <= 2:
|
||||
break
|
||||
target = random.randrange(pos + 1, lmax - 1)
|
||||
# Swap line at pos with randomly chosen target
|
||||
temp = lines[pos]
|
||||
lines[pos] = lines[target]
|
||||
lines[target] = temp
|
||||
pos += 1
|
||||
elif lmax < 1:
|
||||
# No lines were loaded
|
||||
return False
|
||||
for x in lines:
|
||||
self.archive.at_insert(x)
|
||||
return True
|
||||
|
||||
def check_deprecated(param, option, suggestion):
|
||||
if self.params.get(param) is not None:
|
||||
|
@ -367,6 +448,11 @@ def check_deprecated(param, option, suggestion):
|
|||
return True
|
||||
return False
|
||||
|
||||
if self.params.get('verbose'):
|
||||
self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
|
||||
|
||||
preload_download_archive(self)
|
||||
|
||||
if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
|
||||
if self.params.get('geo_verification_proxy') is None:
|
||||
self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
|
||||
|
@ -722,7 +808,7 @@ def prepare_filename(self, info_dict):
|
|||
return None
|
||||
|
||||
def _match_entry(self, info_dict, incomplete):
|
||||
""" Returns None iff the file should be downloaded """
|
||||
""" Returns None if the file should be downloaded """
|
||||
|
||||
video_title = info_dict.get('title', info_dict.get('id', 'video'))
|
||||
if 'title' in info_dict:
|
||||
|
@ -2142,15 +2228,7 @@ def in_download_archive(self, info_dict):
|
|||
if not vid_id:
|
||||
return False # Incomplete video information
|
||||
|
||||
try:
|
||||
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
|
||||
for line in archive_file:
|
||||
if line.strip() == vid_id:
|
||||
return True
|
||||
except IOError as ioe:
|
||||
if ioe.errno != errno.ENOENT:
|
||||
raise
|
||||
return False
|
||||
return self.archive.at_exist(vid_id)
|
||||
|
||||
def record_download_archive(self, info_dict):
|
||||
fn = self.params.get('download_archive')
|
||||
|
@ -2160,6 +2238,7 @@ def record_download_archive(self, info_dict):
|
|||
assert vid_id
|
||||
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
|
||||
archive_file.write(vid_id + '\n')
|
||||
self.archive.at_insert(vid_id)
|
||||
|
||||
@staticmethod
|
||||
def format_resolution(format, default='unknown'):
|
||||
|
|
Loading…
Reference in a new issue