More fast convertation datasets to sqlite

This commit is contained in:
localhost_frssoft 2022-09-03 01:51:59 +03:00
parent cc2993e59c
commit 09b31528f9

View file

@ -12,8 +12,10 @@ conn.commit()
def convert_tsv_to_db(title_basics_tsv): def convert_tsv_to_db(title_basics_tsv):
'''Конвертирование основного датасета в sqlite базу, выполняется весьма долго (5-10 минут)''' '''Конвертирование основного датасета в sqlite базу, выполняется долго (~5 минут)'''
with gzip.open(title_basics_tsv, mode='rt') as file: with gzip.open(title_basics_tsv, mode='rt') as file:
write_dataset = []
counter = 0
for line in file: for line in file:
line = line.split("\t") line = line.split("\t")
try: try:
@ -33,8 +35,11 @@ def convert_tsv_to_db(title_basics_tsv):
year = None year = None
else: else:
year = int(year) year = int(year)
c.execute("INSERT OR REPLACE INTO titles(tt_id, type, original_name, ru_name, year) VALUES (?, ?, ?, ?, ?)", write_dataset.append((tt_id, tt_type, original_name, ru_name, year))
(tt_id, tt_type, original_name, ru_name, year)) counter += 1
if counter >= 1000:
c.executemany("INSERT OR REPLACE INTO titles(tt_id, type, original_name, ru_name, year) VALUES (?, ?, ?, ?, ?)", write_dataset)
write_dataset = []
except Exception as E: except Exception as E:
print(E) print(E)
pass pass
@ -43,6 +48,7 @@ def convert_tsv_to_db(title_basics_tsv):
def extract_ru_locale_from_tsv(title_akas_tsv): def extract_ru_locale_from_tsv(title_akas_tsv):
'''Конвертирование датасета с локализованными названиями и последующее добавление в базу''' '''Конвертирование датасета с локализованными названиями и последующее добавление в базу'''
with gzip.open(title_akas_tsv, mode='rt') as file: with gzip.open(title_akas_tsv, mode='rt') as file:
ru_name_writer = []
for line in file: for line in file:
line = line.split("\t") line = line.split("\t")
try: try:
@ -56,11 +62,13 @@ def extract_ru_locale_from_tsv(title_akas_tsv):
continue continue
ru_name = line[2] ru_name = line[2]
print(ru_name, tt_type) print(ru_name, tt_type)
c.execute("UPDATE titles SET ru_name = ? WHERE tt_id = ?", (ru_name, tt_id)) ru_name_writer.append((ru_name, tt_id))
except Exception as E: except Exception as E:
print(E) print(E)
pass pass
c.executemany("UPDATE titles SET ru_name = ? WHERE tt_id = ?", ru_name_writer)
conn.commit() conn.commit()
def convert_datasets_to_db(): def convert_datasets_to_db():