From 60bf187f58a9aae0f6b2fd8fc851a7239fad6b82 Mon Sep 17 00:00:00 2001 From: Inex Code Date: Mon, 23 Dec 2024 20:42:16 +0300 Subject: [PATCH] feat: PostgreSQL backups and restore --- nixos/module.nix | 2 + selfprivacy_api/backup/__init__.py | 1 + selfprivacy_api/backup/postgres.py | 46 +++++++++++ selfprivacy_api/services/service.py | 6 ++ selfprivacy_api/services/templated_service.py | 77 ++++++++++++++++++- 5 files changed, 130 insertions(+), 2 deletions(-) create mode 100644 selfprivacy_api/backup/postgres.py diff --git a/nixos/module.nix b/nixos/module.nix index e7c38d2..d12486e 100644 --- a/nixos/module.nix +++ b/nixos/module.nix @@ -65,6 +65,7 @@ in wants = [ "network-online.target" ]; wantedBy = [ "multi-user.target" ]; serviceConfig = { + # Do not forget to edit Postgres identMap if you change the user! User = "root"; ExecStart = "${selfprivacy-graphql-api}/bin/app.py"; Restart = "always"; @@ -101,6 +102,7 @@ in wants = [ "network-online.target" ]; wantedBy = [ "multi-user.target" ]; serviceConfig = { + # Do not forget to edit Postgres identMap if you change the user! User = "root"; ExecStart = "${pkgs.python312Packages.huey}/bin/huey_consumer.py selfprivacy_api.task_registry.huey"; Restart = "always"; diff --git a/selfprivacy_api/backup/__init__.py b/selfprivacy_api/backup/__init__.py index 4af7c19..f4b3b80 100644 --- a/selfprivacy_api/backup/__init__.py +++ b/selfprivacy_api/backup/__init__.py @@ -452,6 +452,7 @@ class Backups: with StoppedService(service): if not service.is_always_active(): Backups.assert_dead(service) + service.pre_restore() if strategy == RestoreStrategy.INPLACE: Backups._inplace_restore(service, snapshot, job) else: # verify_before_download is our default diff --git a/selfprivacy_api/backup/postgres.py b/selfprivacy_api/backup/postgres.py new file mode 100644 index 0000000..0fee12a --- /dev/null +++ b/selfprivacy_api/backup/postgres.py @@ -0,0 +1,46 @@ +import subprocess + + +class PostgresDumper: + """--dbname=postgresql://postgres@%2Frun%2Fpostgresql/pleroma""" + + def __init__(self, db_name: str): + self.db_name = db_name + self.user = "postgres" + self.socket_dir = r"%2Frun%2Fpostgresql" + + def backup_database(self, backup_file: str): + # Create the database dump and pipe it to gzip + dump_command = [ + "pg_dump", + f"--dbname=postgresql://{self.user}@{self.socket_dir}/{self.db_name}", + ] + gzip_command = ["gzip", "--rsyncable"] + + with open(backup_file, "wb") as f_out: + dump_process = subprocess.Popen(dump_command, stdout=subprocess.PIPE) + gzip_process = subprocess.Popen( + gzip_command, stdin=dump_process.stdout, stdout=f_out + ) + dump_process.stdout.close() # Allow dump_process to receive a SIGPIPE if gzip_process exits + gzip_process.communicate() + + return backup_file + + def restore_database(self, backup_file: str): + # Decompress the backup file + gunzip_command = ["gunzip", backup_file] + subprocess.run(gunzip_command, check=True) + + # Restore the database from the decompressed file + dump_file = backup_file.replace(".gz", "") + restore_command = [ + "pg_restore", + "--dbname=postgresql://{}@{}/{}".format( + self.user, self.socket_dir, self.db_name + ), + "--clean", + "--create", + dump_file, + ] + subprocess.run(restore_command, check=True) diff --git a/selfprivacy_api/services/service.py b/selfprivacy_api/services/service.py index 4d9c54d..b7097d2 100644 --- a/selfprivacy_api/services/service.py +++ b/selfprivacy_api/services/service.py @@ -328,6 +328,9 @@ class Service(ABC): def get_foldername(path: str) -> str: return path.split("/")[-1] + def get_postgresql_databases(self) -> List[str]: + return [] + # TODO: with better json utils, it can be one line, and not a separate function @classmethod def set_location(cls, volume: BlockDevice): @@ -481,6 +484,9 @@ class Service(ABC): def post_backup(self): pass + def pre_restore(self): + pass + def post_restore(self): pass diff --git a/selfprivacy_api/services/templated_service.py b/selfprivacy_api/services/templated_service.py index a6e65bd..d9ef76a 100644 --- a/selfprivacy_api/services/templated_service.py +++ b/selfprivacy_api/services/templated_service.py @@ -6,12 +6,15 @@ import logging import json import subprocess from typing import List, Optional -from os import path from os.path import join, exists +from os import mkdir, rmdir from pydantic import BaseModel, ConfigDict from pydantic.alias_generators import to_camel +from selfprivacy_api.backup.jobs import get_backup_job +from selfprivacy_api.backup.postgres import PostgresDumper +from selfprivacy_api.jobs import JobStatus, Jobs from selfprivacy_api.models.services import ServiceDnsRecord, ServiceStatus from selfprivacy_api.services.flake_service_manager import FlakeServiceManager from selfprivacy_api.services.generic_size_counter import get_storage_usage @@ -153,7 +156,7 @@ class TemplatedService(Service): self.definition_data = json.loads(source_data) else: # Check if the service exists - if not path.exists(join(SP_MODULES_DEFENITIONS_PATH, service_id)): + if not exists(join(SP_MODULES_DEFENITIONS_PATH, service_id)): raise FileNotFoundError(f"Service {service_id} not found") # Load the service with open(join(SP_MODULES_DEFENITIONS_PATH, service_id)) as file: @@ -401,6 +404,10 @@ class TemplatedService(Service): else: return root_device + def _get_db_dumps_folder(self) -> str: + # Get the drive where the service is located and append the folder name + return join("/", "volumes", self.get_drive(), f"db_dumps_{self.get_id()}") + def get_folders(self) -> List[str]: folders = self.meta.folders owned_folders = self.meta.owned_folders @@ -408,6 +415,8 @@ class TemplatedService(Service): resulting_folders = folders.copy() for folder in owned_folders: resulting_folders.append(folder.path) + if self.get_postgresql_databases(): + resulting_folders.append(self._get_db_dumps_folder()) return folders def get_owned_folders(self) -> List[OwnedPath]: @@ -416,6 +425,14 @@ class TemplatedService(Service): resulting_folders = owned_folders.copy() for folder in folders: resulting_folders.append(self.owned_path(folder)) + if self.get_postgresql_databases(): + resulting_folders.append( + OwnedPath( + path=self._get_db_dumps_folder(), + owner="selfprivacy-api", + group="selfprivacy-api", + ) + ) return resulting_folders def set_location(self, volume: BlockDevice): @@ -431,6 +448,9 @@ class TemplatedService(Service): user_data["modules"][service_id] = {} user_data["modules"][service_id]["location"] = volume.name + def get_postgresql_databases(self) -> List[str]: + return self.meta.postgresql_databases + def owned_path(self, path: str): """Default folder ownership""" service_name = self.get_display_name() @@ -454,3 +474,56 @@ class TemplatedService(Service): owner=owner, group=group, ) + + def pre_backup(self): + if self.get_postgresql_databases(): + job = get_backup_job(self) + # Create the folder for the database dumps + db_dumps_folder = self._get_db_dumps_folder() + if not exists(db_dumps_folder): + mkdir(db_dumps_folder) + # Dump the databases + for db_name in self.get_postgresql_databases(): + if job is not None: + Jobs.update( + job, + status_text=f"Creating a dump of database {db_name}", + status=JobStatus.RUNNING, + ) + db_dumper = PostgresDumper(db_name) + backup_file = join(db_dumps_folder, f"{db_name}.sql.gz") + db_dumper.backup_database(backup_file) + + def post_backup(self): + if self.get_postgresql_databases(): + # Remove the folder for the database dumps + db_dumps_folder = self._get_db_dumps_folder() + if exists(db_dumps_folder): + rmdir(db_dumps_folder) + + def pre_restore(self): + if self.get_postgresql_databases(): + # Create the folder for the database dumps + db_dumps_folder = self._get_db_dumps_folder() + if not exists(db_dumps_folder): + mkdir(db_dumps_folder) + + def post_restore(self): + if self.get_postgresql_databases(): + job = get_backup_job(self) + # Recover the databases + db_dumps_folder = self._get_db_dumps_folder() + for db_name in self.get_postgresql_databases(): + if exists(join(db_dumps_folder, f"{db_name}.sql.gz")): + if job is not None: + Jobs.update( + job, + status_text=f"Restoring database {db_name}", + status=JobStatus.RUNNING, + ) + db_dumper = PostgresDumper(db_name) + backup_file = join(db_dumps_folder, f"{db_name}.sql.gz") + db_dumper.restore_database(backup_file) + else: + logger.error(f"Database dump for {db_name} not found") + raise FileNotFoundError(f"Database dump for {db_name} not found")