mirror of
https://gitea.phreedom.club/localhost_frssoft/FMN_bot.git
synced 2024-11-25 18:31:28 +00:00
Fix datasets converter (year partially broken)
This commit is contained in:
parent
e10b6da0e0
commit
6d30acedd0
|
@ -16,6 +16,8 @@ def convert_tsv_to_db(title_basics_tsv):
|
|||
with gzip.open(title_basics_tsv, mode='rt') as file:
|
||||
write_dataset = []
|
||||
counter = 0
|
||||
chunk = 1000
|
||||
progress_counter = 0
|
||||
for line in file:
|
||||
line = line.split("\t")
|
||||
try:
|
||||
|
@ -24,23 +26,23 @@ def convert_tsv_to_db(title_basics_tsv):
|
|||
original_name = line[3]
|
||||
ru_name = None
|
||||
year = line[5]
|
||||
|
||||
if tt_type not in ("movie", "video"):
|
||||
original_name = None
|
||||
year = "\\N"
|
||||
else:
|
||||
print(tt_id, tt_type, original_name, ru_name, year)
|
||||
|
||||
if year == "\\N":
|
||||
if year.startswith(r"\N"):
|
||||
year = None
|
||||
else:
|
||||
year = int(year)
|
||||
|
||||
if tt_type not in ("movie", "video"):
|
||||
original_name = None
|
||||
year = None
|
||||
|
||||
write_dataset.append((tt_id, tt_type, original_name, ru_name, year))
|
||||
counter += 1
|
||||
if counter >= 1000:
|
||||
if counter >= chunk:
|
||||
c.executemany("INSERT OR REPLACE INTO titles(tt_id, type, original_name, ru_name, year) VALUES (?, ?, ?, ?, ?)", write_dataset)
|
||||
write_dataset = []
|
||||
counter = 0
|
||||
progress_counter += chunk
|
||||
print(f'Обработано: {progress_counter}')
|
||||
except Exception as E:
|
||||
print(E)
|
||||
pass
|
||||
|
@ -50,6 +52,7 @@ def extract_ru_locale_from_tsv(title_akas_tsv):
|
|||
'''Конвертирование датасета с локализованными названиями и последующее добавление в базу'''
|
||||
with gzip.open(title_akas_tsv, mode='rt') as file:
|
||||
ru_name_writer = []
|
||||
counter = 0
|
||||
for line in file:
|
||||
line = line.split("\t")
|
||||
try:
|
||||
|
@ -62,8 +65,9 @@ def extract_ru_locale_from_tsv(title_akas_tsv):
|
|||
if tt_type not in ("movie", "video"):
|
||||
continue
|
||||
ru_name = line[2]
|
||||
print(ru_name, tt_type)
|
||||
ru_name_writer.append((ru_name, tt_id))
|
||||
counter += 1
|
||||
print(f'Обработано ru_name: {counter}')
|
||||
|
||||
except Exception as E:
|
||||
print(E)
|
||||
|
|
Loading…
Reference in a new issue