feat: PostgreSQL backups and restore

This commit is contained in:
Inex Code 2024-12-23 20:42:16 +03:00
parent 89278cffd7
commit 60bf187f58
No known key found for this signature in database
5 changed files with 130 additions and 2 deletions

View file

@ -65,6 +65,7 @@ in
wants = [ "network-online.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
# Do not forget to edit Postgres identMap if you change the user!
User = "root";
ExecStart = "${selfprivacy-graphql-api}/bin/app.py";
Restart = "always";
@ -101,6 +102,7 @@ in
wants = [ "network-online.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
# Do not forget to edit Postgres identMap if you change the user!
User = "root";
ExecStart = "${pkgs.python312Packages.huey}/bin/huey_consumer.py selfprivacy_api.task_registry.huey";
Restart = "always";

View file

@ -452,6 +452,7 @@ class Backups:
with StoppedService(service):
if not service.is_always_active():
Backups.assert_dead(service)
service.pre_restore()
if strategy == RestoreStrategy.INPLACE:
Backups._inplace_restore(service, snapshot, job)
else: # verify_before_download is our default

View file

@ -0,0 +1,46 @@
import subprocess
class PostgresDumper:
"""--dbname=postgresql://postgres@%2Frun%2Fpostgresql/pleroma"""
def __init__(self, db_name: str):
self.db_name = db_name
self.user = "postgres"
self.socket_dir = r"%2Frun%2Fpostgresql"
def backup_database(self, backup_file: str):
# Create the database dump and pipe it to gzip
dump_command = [
"pg_dump",
f"--dbname=postgresql://{self.user}@{self.socket_dir}/{self.db_name}",
]
gzip_command = ["gzip", "--rsyncable"]
with open(backup_file, "wb") as f_out:
dump_process = subprocess.Popen(dump_command, stdout=subprocess.PIPE)
gzip_process = subprocess.Popen(
gzip_command, stdin=dump_process.stdout, stdout=f_out
)
dump_process.stdout.close() # Allow dump_process to receive a SIGPIPE if gzip_process exits
gzip_process.communicate()
return backup_file
def restore_database(self, backup_file: str):
# Decompress the backup file
gunzip_command = ["gunzip", backup_file]
subprocess.run(gunzip_command, check=True)
# Restore the database from the decompressed file
dump_file = backup_file.replace(".gz", "")
restore_command = [
"pg_restore",
"--dbname=postgresql://{}@{}/{}".format(
self.user, self.socket_dir, self.db_name
),
"--clean",
"--create",
dump_file,
]
subprocess.run(restore_command, check=True)

View file

@ -328,6 +328,9 @@ class Service(ABC):
def get_foldername(path: str) -> str:
return path.split("/")[-1]
def get_postgresql_databases(self) -> List[str]:
return []
# TODO: with better json utils, it can be one line, and not a separate function
@classmethod
def set_location(cls, volume: BlockDevice):
@ -481,6 +484,9 @@ class Service(ABC):
def post_backup(self):
pass
def pre_restore(self):
pass
def post_restore(self):
pass

View file

@ -6,12 +6,15 @@ import logging
import json
import subprocess
from typing import List, Optional
from os import path
from os.path import join, exists
from os import mkdir, rmdir
from pydantic import BaseModel, ConfigDict
from pydantic.alias_generators import to_camel
from selfprivacy_api.backup.jobs import get_backup_job
from selfprivacy_api.backup.postgres import PostgresDumper
from selfprivacy_api.jobs import JobStatus, Jobs
from selfprivacy_api.models.services import ServiceDnsRecord, ServiceStatus
from selfprivacy_api.services.flake_service_manager import FlakeServiceManager
from selfprivacy_api.services.generic_size_counter import get_storage_usage
@ -153,7 +156,7 @@ class TemplatedService(Service):
self.definition_data = json.loads(source_data)
else:
# Check if the service exists
if not path.exists(join(SP_MODULES_DEFENITIONS_PATH, service_id)):
if not exists(join(SP_MODULES_DEFENITIONS_PATH, service_id)):
raise FileNotFoundError(f"Service {service_id} not found")
# Load the service
with open(join(SP_MODULES_DEFENITIONS_PATH, service_id)) as file:
@ -401,6 +404,10 @@ class TemplatedService(Service):
else:
return root_device
def _get_db_dumps_folder(self) -> str:
# Get the drive where the service is located and append the folder name
return join("/", "volumes", self.get_drive(), f"db_dumps_{self.get_id()}")
def get_folders(self) -> List[str]:
folders = self.meta.folders
owned_folders = self.meta.owned_folders
@ -408,6 +415,8 @@ class TemplatedService(Service):
resulting_folders = folders.copy()
for folder in owned_folders:
resulting_folders.append(folder.path)
if self.get_postgresql_databases():
resulting_folders.append(self._get_db_dumps_folder())
return folders
def get_owned_folders(self) -> List[OwnedPath]:
@ -416,6 +425,14 @@ class TemplatedService(Service):
resulting_folders = owned_folders.copy()
for folder in folders:
resulting_folders.append(self.owned_path(folder))
if self.get_postgresql_databases():
resulting_folders.append(
OwnedPath(
path=self._get_db_dumps_folder(),
owner="selfprivacy-api",
group="selfprivacy-api",
)
)
return resulting_folders
def set_location(self, volume: BlockDevice):
@ -431,6 +448,9 @@ class TemplatedService(Service):
user_data["modules"][service_id] = {}
user_data["modules"][service_id]["location"] = volume.name
def get_postgresql_databases(self) -> List[str]:
return self.meta.postgresql_databases
def owned_path(self, path: str):
"""Default folder ownership"""
service_name = self.get_display_name()
@ -454,3 +474,56 @@ class TemplatedService(Service):
owner=owner,
group=group,
)
def pre_backup(self):
if self.get_postgresql_databases():
job = get_backup_job(self)
# Create the folder for the database dumps
db_dumps_folder = self._get_db_dumps_folder()
if not exists(db_dumps_folder):
mkdir(db_dumps_folder)
# Dump the databases
for db_name in self.get_postgresql_databases():
if job is not None:
Jobs.update(
job,
status_text=f"Creating a dump of database {db_name}",
status=JobStatus.RUNNING,
)
db_dumper = PostgresDumper(db_name)
backup_file = join(db_dumps_folder, f"{db_name}.sql.gz")
db_dumper.backup_database(backup_file)
def post_backup(self):
if self.get_postgresql_databases():
# Remove the folder for the database dumps
db_dumps_folder = self._get_db_dumps_folder()
if exists(db_dumps_folder):
rmdir(db_dumps_folder)
def pre_restore(self):
if self.get_postgresql_databases():
# Create the folder for the database dumps
db_dumps_folder = self._get_db_dumps_folder()
if not exists(db_dumps_folder):
mkdir(db_dumps_folder)
def post_restore(self):
if self.get_postgresql_databases():
job = get_backup_job(self)
# Recover the databases
db_dumps_folder = self._get_db_dumps_folder()
for db_name in self.get_postgresql_databases():
if exists(join(db_dumps_folder, f"{db_name}.sql.gz")):
if job is not None:
Jobs.update(
job,
status_text=f"Restoring database {db_name}",
status=JobStatus.RUNNING,
)
db_dumper = PostgresDumper(db_name)
backup_file = join(db_dumps_folder, f"{db_name}.sql.gz")
db_dumper.restore_database(backup_file)
else:
logger.error(f"Database dump for {db_name} not found")
raise FileNotFoundError(f"Database dump for {db_name} not found")