Fix datasets converter (year partially broken)

This commit is contained in:
localhost_frssoft 2022-09-03 18:35:26 +03:00
parent e10b6da0e0
commit 6d30acedd0

View file

@ -16,6 +16,8 @@ def convert_tsv_to_db(title_basics_tsv):
with gzip.open(title_basics_tsv, mode='rt') as file:
write_dataset = []
counter = 0
chunk = 1000
progress_counter = 0
for line in file:
line = line.split("\t")
try:
@ -24,23 +26,23 @@ def convert_tsv_to_db(title_basics_tsv):
original_name = line[3]
ru_name = None
year = line[5]
if tt_type not in ("movie", "video"):
original_name = None
year = "\\N"
else:
print(tt_id, tt_type, original_name, ru_name, year)
if year == "\\N":
if year.startswith(r"\N"):
year = None
else:
year = int(year)
if tt_type not in ("movie", "video"):
original_name = None
year = None
write_dataset.append((tt_id, tt_type, original_name, ru_name, year))
counter += 1
if counter >= 1000:
if counter >= chunk:
c.executemany("INSERT OR REPLACE INTO titles(tt_id, type, original_name, ru_name, year) VALUES (?, ?, ?, ?, ?)", write_dataset)
write_dataset = []
counter = 0
progress_counter += chunk
print(f'Обработано: {progress_counter}')
except Exception as E:
print(E)
pass
@ -50,6 +52,7 @@ def extract_ru_locale_from_tsv(title_akas_tsv):
'''Конвертирование датасета с локализованными названиями и последующее добавление в базу'''
with gzip.open(title_akas_tsv, mode='rt') as file:
ru_name_writer = []
counter = 0
for line in file:
line = line.split("\t")
try:
@ -62,8 +65,9 @@ def extract_ru_locale_from_tsv(title_akas_tsv):
if tt_type not in ("movie", "video"):
continue
ru_name = line[2]
print(ru_name, tt_type)
ru_name_writer.append((ru_name, tt_id))
counter += 1
print(f'Обработано ru_name: {counter}')
except Exception as E:
print(E)