More fast convertation datasets to sqlite

This commit is contained in:
localhost_frssoft 2022-09-03 01:51:59 +03:00
parent cc2993e59c
commit 09b31528f9

View file

@ -12,8 +12,10 @@ conn.commit()
def convert_tsv_to_db(title_basics_tsv):
'''Конвертирование основного датасета в sqlite базу, выполняется весьма долго (5-10 минут)'''
'''Конвертирование основного датасета в sqlite базу, выполняется долго (~5 минут)'''
with gzip.open(title_basics_tsv, mode='rt') as file:
write_dataset = []
counter = 0
for line in file:
line = line.split("\t")
try:
@ -33,16 +35,20 @@ def convert_tsv_to_db(title_basics_tsv):
year = None
else:
year = int(year)
c.execute("INSERT OR REPLACE INTO titles(tt_id, type, original_name, ru_name, year) VALUES (?, ?, ?, ?, ?)",
(tt_id, tt_type, original_name, ru_name, year))
write_dataset.append((tt_id, tt_type, original_name, ru_name, year))
counter += 1
if counter >= 1000:
c.executemany("INSERT OR REPLACE INTO titles(tt_id, type, original_name, ru_name, year) VALUES (?, ?, ?, ?, ?)", write_dataset)
write_dataset = []
except Exception as E:
print(E)
pass
conn.commit()
conn.commit()
def extract_ru_locale_from_tsv(title_akas_tsv):
'''Конвертирование датасета с локализованными названиями и последующее добавление в базу'''
with gzip.open(title_akas_tsv, mode='rt') as file:
ru_name_writer = []
for line in file:
line = line.split("\t")
try:
@ -56,11 +62,13 @@ def extract_ru_locale_from_tsv(title_akas_tsv):
continue
ru_name = line[2]
print(ru_name, tt_type)
c.execute("UPDATE titles SET ru_name = ? WHERE tt_id = ?", (ru_name, tt_id))
ru_name_writer.append((ru_name, tt_id))
except Exception as E:
print(E)
pass
c.executemany("UPDATE titles SET ru_name = ? WHERE tt_id = ?", ru_name_writer)
conn.commit()
def convert_datasets_to_db():