Merge pull request 'delete-snapshot-batching' (#73) from delete-snapshot-batching into master

Reviewed-on: https://git.selfprivacy.org/SelfPrivacy/selfprivacy-rest-api/pulls/73
Reviewed-by: Inex Code <inex.code@selfprivacy.org>
This commit is contained in:
Inex Code 2023-11-20 14:01:53 +02:00
commit 860071e046
8 changed files with 113 additions and 49 deletions

View file

@ -395,11 +395,8 @@ class Backups:
auto_snaps = Backups._auto_snaps(service) auto_snaps = Backups._auto_snaps(service)
new_snaplist = Backups._prune_snaps_with_quotas(auto_snaps) new_snaplist = Backups._prune_snaps_with_quotas(auto_snaps)
# TODO: Can be optimized since there is forgetting of an array in one restic op deletable_snaps = [snap for snap in auto_snaps if snap not in new_snaplist]
# but most of the time this will be only one snap to forget. Backups.forget_snapshots(deletable_snaps)
for snap in auto_snaps:
if snap not in new_snaplist:
Backups.forget_snapshot(snap)
@staticmethod @staticmethod
def _standardize_quotas(i: int) -> int: def _standardize_quotas(i: int) -> int:
@ -426,7 +423,10 @@ class Backups:
yearly=Backups._standardize_quotas(quotas.yearly), # type: ignore yearly=Backups._standardize_quotas(quotas.yearly), # type: ignore
) )
) )
# do not prune all autosnaps right away, this will be done by an async task
@staticmethod
def prune_all_autosnaps() -> None:
for service in get_all_services(): for service in get_all_services():
Backups._prune_auto_snaps(service) Backups._prune_auto_snaps(service)
@ -606,6 +606,19 @@ class Backups:
return snap return snap
@staticmethod
def forget_snapshots(snapshots: List[Snapshot]) -> None:
"""
Deletes a batch of snapshots from the repo and from cache
Optimized
"""
ids = [snapshot.id for snapshot in snapshots]
Backups.provider().backupper.forget_snapshots(ids)
# less critical
for snapshot in snapshots:
Storage.delete_cached_snapshot(snapshot)
@staticmethod @staticmethod
def forget_snapshot(snapshot: Snapshot) -> None: def forget_snapshot(snapshot: Snapshot) -> None:
"""Deletes a snapshot from the repo and from cache""" """Deletes a snapshot from the repo and from cache"""
@ -614,11 +627,11 @@ class Backups:
@staticmethod @staticmethod
def forget_all_snapshots(): def forget_all_snapshots():
"""deliberately erase all snapshots we made""" """
# there is no dedicated optimized command for this, Mark all snapshots we have made for deletion and make them inaccessible
# but maybe we can have a multi-erase (this is done by cloud, we only issue a command)
for snapshot in Backups.get_all_snapshots(): """
Backups.forget_snapshot(snapshot) Backups.forget_snapshots(Backups.get_all_snapshots())
@staticmethod @staticmethod
def force_snapshot_cache_reload() -> None: def force_snapshot_cache_reload() -> None:

View file

@ -66,3 +66,8 @@ class AbstractBackupper(ABC):
def forget_snapshot(self, snapshot_id) -> None: def forget_snapshot(self, snapshot_id) -> None:
"""Forget a snapshot""" """Forget a snapshot"""
raise NotImplementedError raise NotImplementedError
@abstractmethod
def forget_snapshots(self, snapshot_ids: List[str]) -> None:
"""Maybe optimized deletion of a batch of snapshots, just cycling if unsupported"""
raise NotImplementedError

View file

@ -39,4 +39,7 @@ class NoneBackupper(AbstractBackupper):
raise NotImplementedError raise NotImplementedError
def forget_snapshot(self, snapshot_id): def forget_snapshot(self, snapshot_id):
raise NotImplementedError raise NotImplementedError("forget_snapshot")
def forget_snapshots(self, snapshots):
raise NotImplementedError("forget_snapshots")

View file

@ -86,6 +86,10 @@ class ResticBackupper(AbstractBackupper):
return f"echo {LocalBackupSecret.get()}" return f"echo {LocalBackupSecret.get()}"
def restic_command(self, *args, tags: Optional[List[str]] = None) -> List[str]: def restic_command(self, *args, tags: Optional[List[str]] = None) -> List[str]:
"""
Construct a restic command against the currently configured repo
Can support [nested] arrays as arguments, will flatten them into the final commmand
"""
if tags is None: if tags is None:
tags = [] tags = []
@ -384,15 +388,15 @@ class ResticBackupper(AbstractBackupper):
output, output,
) )
def forget_snapshot(self, snapshot_id: str) -> None:
self.forget_snapshots([snapshot_id])
@unlocked_repo @unlocked_repo
def forget_snapshot(self, snapshot_id) -> None: def forget_snapshots(self, snapshot_ids: List[str]) -> None:
""" # in case the backupper program supports batching, otherwise implement it by cycling
Either removes snapshot or marks it for deletion later,
depending on server settings
"""
forget_command = self.restic_command( forget_command = self.restic_command(
"forget", "forget",
snapshot_id, [snapshot_ids],
# TODO: prune should be done in a separate process # TODO: prune should be done in a separate process
"--prune", "--prune",
) )
@ -414,7 +418,7 @@ class ResticBackupper(AbstractBackupper):
if "no matching ID found" in err: if "no matching ID found" in err:
raise ValueError( raise ValueError(
"trying to delete, but no such snapshot: ", snapshot_id "trying to delete, but no such snapshot(s): ", snapshot_ids
) )
assert ( assert (

View file

@ -3,13 +3,18 @@ The tasks module contains the worker tasks that are used to back up and restore
""" """
from datetime import datetime, timezone from datetime import datetime, timezone
from selfprivacy_api.graphql.common_types.backup import RestoreStrategy, BackupReason from selfprivacy_api.graphql.common_types.backup import (
RestoreStrategy,
BackupReason,
)
from selfprivacy_api.models.backup.snapshot import Snapshot from selfprivacy_api.models.backup.snapshot import Snapshot
from selfprivacy_api.utils.huey import huey from selfprivacy_api.utils.huey import huey
from huey import crontab from huey import crontab
from selfprivacy_api.services.service import Service from selfprivacy_api.services.service import Service
from selfprivacy_api.backup import Backups from selfprivacy_api.backup import Backups
from selfprivacy_api.jobs import Jobs, JobStatus, Job
SNAPSHOT_CACHE_TTL_HOURS = 6 SNAPSHOT_CACHE_TTL_HOURS = 6
@ -36,6 +41,22 @@ def start_backup(
return True return True
@huey.task()
def prune_autobackup_snapshots(job: Job) -> bool:
"""
Remove all autobackup snapshots that do not fit into quotas set
"""
Jobs.update(job, JobStatus.RUNNING)
try:
Backups.prune_all_autosnaps()
except Exception as e:
Jobs.update(job, JobStatus.ERROR, error=type(e).__name__ + ":" + str(e))
return False
Jobs.update(job, JobStatus.FINISHED)
return True
@huey.task() @huey.task()
def restore_snapshot( def restore_snapshot(
snapshot: Snapshot, snapshot: Snapshot,

View file

@ -1,6 +1,8 @@
import typing import typing
import strawberry import strawberry
from selfprivacy_api.jobs import Jobs
from selfprivacy_api.graphql import IsAuthenticated from selfprivacy_api.graphql import IsAuthenticated
from selfprivacy_api.graphql.mutations.mutation_interface import ( from selfprivacy_api.graphql.mutations.mutation_interface import (
GenericMutationReturn, GenericMutationReturn,
@ -18,7 +20,11 @@ from selfprivacy_api.graphql.common_types.backup import (
from selfprivacy_api.backup import Backups from selfprivacy_api.backup import Backups
from selfprivacy_api.services import get_service_by_id from selfprivacy_api.services import get_service_by_id
from selfprivacy_api.backup.tasks import start_backup, restore_snapshot from selfprivacy_api.backup.tasks import (
start_backup,
restore_snapshot,
prune_autobackup_snapshots,
)
from selfprivacy_api.backup.jobs import add_backup_job, add_restore_job from selfprivacy_api.backup.jobs import add_backup_job, add_restore_job
@ -103,8 +109,16 @@ class BackupMutations:
To disable autobackup use autobackup period setting, not this mutation. To disable autobackup use autobackup period setting, not this mutation.
""" """
job = Jobs.add(
name="Trimming autobackup snapshots",
type_id="backups.autobackup_trimming",
description="Pruning the excessive snapshots after the new autobackup quotas are set",
)
try: try:
Backups.set_autobackup_quotas(quotas) Backups.set_autobackup_quotas(quotas)
# this task is async and can fail with only a job to report the error
prune_autobackup_snapshots(job)
return GenericBackupConfigReturn( return GenericBackupConfigReturn(
success=True, success=True,
message="", message="",
@ -115,7 +129,7 @@ class BackupMutations:
except Exception as e: except Exception as e:
return GenericBackupConfigReturn( return GenericBackupConfigReturn(
success=False, success=False,
message=str(e), message=type(e).__name__ + ":" + str(e),
code=400, code=400,
configuration=Backup().configuration(), configuration=Backup().configuration(),
) )

View file

@ -265,6 +265,10 @@ def api_init_without_key(
def assert_ok(data): def assert_ok(data):
if data["success"] is False:
# convenience for debugging, this should display error
# if empty, consider adding helpful messages
raise ValueError(data["code"], data["message"])
assert data["code"] == 200 assert data["code"] == 200
assert data["success"] is True assert data["success"] is True
@ -302,7 +306,7 @@ def test_snapshots_empty(authorized_client, dummy_service):
assert snaps == [] assert snaps == []
def test_start_backup(authorized_client, dummy_service): def test_start_backup(authorized_client, dummy_service, backups):
response = api_backup(authorized_client, dummy_service) response = api_backup(authorized_client, dummy_service)
data = get_data(response)["backup"]["startBackup"] data = get_data(response)["backup"]["startBackup"]
assert data["success"] is True assert data["success"] is True
@ -318,7 +322,7 @@ def test_start_backup(authorized_client, dummy_service):
assert snap["service"]["id"] == "testservice" assert snap["service"]["id"] == "testservice"
def test_restore(authorized_client, dummy_service): def test_restore(authorized_client, dummy_service, backups):
api_backup(authorized_client, dummy_service) api_backup(authorized_client, dummy_service)
snap = api_snapshots(authorized_client)[0] snap = api_snapshots(authorized_client)[0]
assert snap["id"] is not None assert snap["id"] is not None
@ -331,7 +335,7 @@ def test_restore(authorized_client, dummy_service):
assert Jobs.get_job(job["uid"]).status == JobStatus.FINISHED assert Jobs.get_job(job["uid"]).status == JobStatus.FINISHED
def test_reinit(authorized_client, dummy_service, tmpdir): def test_reinit(authorized_client, dummy_service, tmpdir, backups):
test_repo_path = path.join(tmpdir, "not_at_all_sus") test_repo_path = path.join(tmpdir, "not_at_all_sus")
response = api_init_without_key( response = api_init_without_key(
authorized_client, "FILE", "", "", test_repo_path, "" authorized_client, "FILE", "", "", test_repo_path, ""
@ -353,7 +357,7 @@ def test_reinit(authorized_client, dummy_service, tmpdir):
assert Jobs.get_job(job["uid"]).status == JobStatus.FINISHED assert Jobs.get_job(job["uid"]).status == JobStatus.FINISHED
def test_remove(authorized_client, generic_userdata): def test_remove(authorized_client, generic_userdata, backups):
response = api_remove(authorized_client) response = api_remove(authorized_client)
data = get_data(response)["backup"]["removeRepository"] data = get_data(response)["backup"]["removeRepository"]
assert_ok(data) assert_ok(data)
@ -367,7 +371,7 @@ def test_remove(authorized_client, generic_userdata):
assert configuration["isInitialized"] is False assert configuration["isInitialized"] is False
def test_autobackup_quotas_nonzero(authorized_client): def test_autobackup_quotas_nonzero(authorized_client, backups):
quotas = _AutobackupQuotas( quotas = _AutobackupQuotas(
last=3, last=3,
daily=2, daily=2,
@ -383,7 +387,7 @@ def test_autobackup_quotas_nonzero(authorized_client):
assert configuration["autobackupQuotas"] == quotas assert configuration["autobackupQuotas"] == quotas
def test_autobackup_period_nonzero(authorized_client): def test_autobackup_period_nonzero(authorized_client, backups):
new_period = 11 new_period = 11
response = api_set_period(authorized_client, new_period) response = api_set_period(authorized_client, new_period)
data = get_data(response)["backup"]["setAutobackupPeriod"] data = get_data(response)["backup"]["setAutobackupPeriod"]
@ -393,7 +397,7 @@ def test_autobackup_period_nonzero(authorized_client):
assert configuration["autobackupPeriod"] == new_period assert configuration["autobackupPeriod"] == new_period
def test_autobackup_period_zero(authorized_client): def test_autobackup_period_zero(authorized_client, backups):
new_period = 0 new_period = 0
# since it is none by default, we better first set it to something non-negative # since it is none by default, we better first set it to something non-negative
response = api_set_period(authorized_client, 11) response = api_set_period(authorized_client, 11)
@ -406,7 +410,7 @@ def test_autobackup_period_zero(authorized_client):
assert configuration["autobackupPeriod"] == None assert configuration["autobackupPeriod"] == None
def test_autobackup_period_none(authorized_client): def test_autobackup_period_none(authorized_client, backups):
# since it is none by default, we better first set it to something non-negative # since it is none by default, we better first set it to something non-negative
response = api_set_period(authorized_client, 11) response = api_set_period(authorized_client, 11)
# and now we nullify it # and now we nullify it
@ -418,7 +422,7 @@ def test_autobackup_period_none(authorized_client):
assert configuration["autobackupPeriod"] == None assert configuration["autobackupPeriod"] == None
def test_autobackup_period_negative(authorized_client): def test_autobackup_period_negative(authorized_client, backups):
# since it is none by default, we better first set it to something non-negative # since it is none by default, we better first set it to something non-negative
response = api_set_period(authorized_client, 11) response = api_set_period(authorized_client, 11)
# and now we nullify it # and now we nullify it
@ -432,7 +436,7 @@ def test_autobackup_period_negative(authorized_client):
# We cannot really check the effect at this level, we leave it to backend tests # We cannot really check the effect at this level, we leave it to backend tests
# But we still make it run in both empty and full scenarios and ask for snaps afterwards # But we still make it run in both empty and full scenarios and ask for snaps afterwards
def test_reload_snapshots_bare_bare_bare(authorized_client, dummy_service): def test_reload_snapshots_bare_bare_bare(authorized_client, dummy_service, backups):
api_remove(authorized_client) api_remove(authorized_client)
response = api_reload_snapshots(authorized_client) response = api_reload_snapshots(authorized_client)
@ -443,7 +447,7 @@ def test_reload_snapshots_bare_bare_bare(authorized_client, dummy_service):
assert snaps == [] assert snaps == []
def test_reload_snapshots(authorized_client, dummy_service): def test_reload_snapshots(authorized_client, dummy_service, backups):
response = api_backup(authorized_client, dummy_service) response = api_backup(authorized_client, dummy_service)
data = get_data(response)["backup"]["startBackup"] data = get_data(response)["backup"]["startBackup"]
@ -455,7 +459,7 @@ def test_reload_snapshots(authorized_client, dummy_service):
assert len(snaps) == 1 assert len(snaps) == 1
def test_forget_snapshot(authorized_client, dummy_service): def test_forget_snapshot(authorized_client, dummy_service, backups):
response = api_backup(authorized_client, dummy_service) response = api_backup(authorized_client, dummy_service)
data = get_data(response)["backup"]["startBackup"] data = get_data(response)["backup"]["startBackup"]
@ -470,7 +474,7 @@ def test_forget_snapshot(authorized_client, dummy_service):
assert len(snaps) == 0 assert len(snaps) == 0
def test_forget_nonexistent_snapshot(authorized_client, dummy_service): def test_forget_nonexistent_snapshot(authorized_client, dummy_service, backups):
snaps = api_snapshots(authorized_client) snaps = api_snapshots(authorized_client)
assert len(snaps) == 0 assert len(snaps) == 0
response = api_forget(authorized_client, "898798uekiodpjoiweoiwuoeirueor") response = api_forget(authorized_client, "898798uekiodpjoiweoiwuoeirueor")

View file

@ -1,52 +1,49 @@
import pytest import pytest
import os import os
import os.path as path import os.path as path
from os import makedirs from os import makedirs
from os import remove from os import remove
from os import listdir from os import listdir
from os import urandom from os import urandom
from datetime import datetime, timedelta, timezone, date, time
from subprocess import Popen from datetime import datetime, timedelta, timezone
from copy import copy from copy import copy
import secrets
import tempfile import tempfile
from selfprivacy_api.utils.huey import huey from selfprivacy_api.utils.huey import huey
import selfprivacy_api.services as services import selfprivacy_api.services as services
from selfprivacy_api.services import Service, get_all_services from selfprivacy_api.services import Service, get_all_services
from selfprivacy_api.services.service import ServiceStatus
from selfprivacy_api.services import get_service_by_id from selfprivacy_api.services import get_service_by_id
from selfprivacy_api.services.service import ServiceStatus
from selfprivacy_api.services.test_service import DummyService from selfprivacy_api.services.test_service import DummyService
from selfprivacy_api.graphql.queries.providers import BackupProvider from selfprivacy_api.graphql.queries.providers import BackupProvider
from selfprivacy_api.graphql.common_types.backup import RestoreStrategy, BackupReason from selfprivacy_api.graphql.common_types.backup import (
RestoreStrategy,
BackupReason,
AutobackupQuotas,
)
from selfprivacy_api.jobs import Jobs, JobStatus from selfprivacy_api.jobs import Jobs, JobStatus
from selfprivacy_api.models.backup.snapshot import Snapshot from selfprivacy_api.models.backup.snapshot import Snapshot
from selfprivacy_api.graphql.common_types.backup import AutobackupQuotas
from selfprivacy_api.backup import Backups, BACKUP_PROVIDER_ENVS from selfprivacy_api.backup import Backups, BACKUP_PROVIDER_ENVS
import selfprivacy_api.backup.providers as providers import selfprivacy_api.backup.providers as providers
from selfprivacy_api.backup.providers import AbstractBackupProvider from selfprivacy_api.backup.providers import AbstractBackupProvider
from selfprivacy_api.backup.providers.backblaze import Backblaze from selfprivacy_api.backup.providers.backblaze import Backblaze
from selfprivacy_api.backup.providers.none import NoBackups from selfprivacy_api.backup.providers.none import NoBackups
from selfprivacy_api.backup.util import sync from selfprivacy_api.backup.util import sync
from selfprivacy_api.backup.backuppers.restic_backupper import ResticBackupper
from selfprivacy_api.backup.jobs import add_backup_job, add_restore_job
from selfprivacy_api.backup.tasks import ( from selfprivacy_api.backup.tasks import (
start_backup, start_backup,
restore_snapshot, restore_snapshot,
reload_snapshot_cache, reload_snapshot_cache,
prune_autobackup_snapshots,
) )
from selfprivacy_api.backup.storage import Storage from selfprivacy_api.backup.storage import Storage
from selfprivacy_api.backup.jobs import get_backup_job
TESTFILE_BODY = "testytest!" TESTFILE_BODY = "testytest!"
@ -651,6 +648,9 @@ def test_too_many_auto(backups, dummy_service):
# Retroactivity # Retroactivity
quota.last = 1 quota.last = 1
Backups.set_autobackup_quotas(quota) Backups.set_autobackup_quotas(quota)
job = Jobs.add("trimming", "test.autobackup_trimming", "trimming the snaps!")
handle = prune_autobackup_snapshots(job)
handle(blocking=True)
snaps = Backups.get_snapshots(dummy_service) snaps = Backups.get_snapshots(dummy_service)
assert len(snaps) == 1 assert len(snaps) == 1