Merge pull request 'delete-snapshot-batching' (#73) from delete-snapshot-batching into master

Reviewed-on: https://git.selfprivacy.org/SelfPrivacy/selfprivacy-rest-api/pulls/73
Reviewed-by: Inex Code <inex.code@selfprivacy.org>
This commit is contained in:
Inex Code 2023-11-20 14:01:53 +02:00
commit 860071e046
8 changed files with 113 additions and 49 deletions

View file

@ -395,11 +395,8 @@ class Backups:
auto_snaps = Backups._auto_snaps(service)
new_snaplist = Backups._prune_snaps_with_quotas(auto_snaps)
# TODO: Can be optimized since there is forgetting of an array in one restic op
# but most of the time this will be only one snap to forget.
for snap in auto_snaps:
if snap not in new_snaplist:
Backups.forget_snapshot(snap)
deletable_snaps = [snap for snap in auto_snaps if snap not in new_snaplist]
Backups.forget_snapshots(deletable_snaps)
@staticmethod
def _standardize_quotas(i: int) -> int:
@ -426,7 +423,10 @@ class Backups:
yearly=Backups._standardize_quotas(quotas.yearly), # type: ignore
)
)
# do not prune all autosnaps right away, this will be done by an async task
@staticmethod
def prune_all_autosnaps() -> None:
for service in get_all_services():
Backups._prune_auto_snaps(service)
@ -606,6 +606,19 @@ class Backups:
return snap
@staticmethod
def forget_snapshots(snapshots: List[Snapshot]) -> None:
"""
Deletes a batch of snapshots from the repo and from cache
Optimized
"""
ids = [snapshot.id for snapshot in snapshots]
Backups.provider().backupper.forget_snapshots(ids)
# less critical
for snapshot in snapshots:
Storage.delete_cached_snapshot(snapshot)
@staticmethod
def forget_snapshot(snapshot: Snapshot) -> None:
"""Deletes a snapshot from the repo and from cache"""
@ -614,11 +627,11 @@ class Backups:
@staticmethod
def forget_all_snapshots():
"""deliberately erase all snapshots we made"""
# there is no dedicated optimized command for this,
# but maybe we can have a multi-erase
for snapshot in Backups.get_all_snapshots():
Backups.forget_snapshot(snapshot)
"""
Mark all snapshots we have made for deletion and make them inaccessible
(this is done by cloud, we only issue a command)
"""
Backups.forget_snapshots(Backups.get_all_snapshots())
@staticmethod
def force_snapshot_cache_reload() -> None:

View file

@ -66,3 +66,8 @@ class AbstractBackupper(ABC):
def forget_snapshot(self, snapshot_id) -> None:
"""Forget a snapshot"""
raise NotImplementedError
@abstractmethod
def forget_snapshots(self, snapshot_ids: List[str]) -> None:
"""Maybe optimized deletion of a batch of snapshots, just cycling if unsupported"""
raise NotImplementedError

View file

@ -39,4 +39,7 @@ class NoneBackupper(AbstractBackupper):
raise NotImplementedError
def forget_snapshot(self, snapshot_id):
raise NotImplementedError
raise NotImplementedError("forget_snapshot")
def forget_snapshots(self, snapshots):
raise NotImplementedError("forget_snapshots")

View file

@ -86,6 +86,10 @@ class ResticBackupper(AbstractBackupper):
return f"echo {LocalBackupSecret.get()}"
def restic_command(self, *args, tags: Optional[List[str]] = None) -> List[str]:
"""
Construct a restic command against the currently configured repo
Can support [nested] arrays as arguments, will flatten them into the final commmand
"""
if tags is None:
tags = []
@ -384,15 +388,15 @@ class ResticBackupper(AbstractBackupper):
output,
)
def forget_snapshot(self, snapshot_id: str) -> None:
self.forget_snapshots([snapshot_id])
@unlocked_repo
def forget_snapshot(self, snapshot_id) -> None:
"""
Either removes snapshot or marks it for deletion later,
depending on server settings
"""
def forget_snapshots(self, snapshot_ids: List[str]) -> None:
# in case the backupper program supports batching, otherwise implement it by cycling
forget_command = self.restic_command(
"forget",
snapshot_id,
[snapshot_ids],
# TODO: prune should be done in a separate process
"--prune",
)
@ -414,7 +418,7 @@ class ResticBackupper(AbstractBackupper):
if "no matching ID found" in err:
raise ValueError(
"trying to delete, but no such snapshot: ", snapshot_id
"trying to delete, but no such snapshot(s): ", snapshot_ids
)
assert (

View file

@ -3,13 +3,18 @@ The tasks module contains the worker tasks that are used to back up and restore
"""
from datetime import datetime, timezone
from selfprivacy_api.graphql.common_types.backup import RestoreStrategy, BackupReason
from selfprivacy_api.graphql.common_types.backup import (
RestoreStrategy,
BackupReason,
)
from selfprivacy_api.models.backup.snapshot import Snapshot
from selfprivacy_api.utils.huey import huey
from huey import crontab
from selfprivacy_api.services.service import Service
from selfprivacy_api.backup import Backups
from selfprivacy_api.jobs import Jobs, JobStatus, Job
SNAPSHOT_CACHE_TTL_HOURS = 6
@ -36,6 +41,22 @@ def start_backup(
return True
@huey.task()
def prune_autobackup_snapshots(job: Job) -> bool:
"""
Remove all autobackup snapshots that do not fit into quotas set
"""
Jobs.update(job, JobStatus.RUNNING)
try:
Backups.prune_all_autosnaps()
except Exception as e:
Jobs.update(job, JobStatus.ERROR, error=type(e).__name__ + ":" + str(e))
return False
Jobs.update(job, JobStatus.FINISHED)
return True
@huey.task()
def restore_snapshot(
snapshot: Snapshot,

View file

@ -1,6 +1,8 @@
import typing
import strawberry
from selfprivacy_api.jobs import Jobs
from selfprivacy_api.graphql import IsAuthenticated
from selfprivacy_api.graphql.mutations.mutation_interface import (
GenericMutationReturn,
@ -18,7 +20,11 @@ from selfprivacy_api.graphql.common_types.backup import (
from selfprivacy_api.backup import Backups
from selfprivacy_api.services import get_service_by_id
from selfprivacy_api.backup.tasks import start_backup, restore_snapshot
from selfprivacy_api.backup.tasks import (
start_backup,
restore_snapshot,
prune_autobackup_snapshots,
)
from selfprivacy_api.backup.jobs import add_backup_job, add_restore_job
@ -103,8 +109,16 @@ class BackupMutations:
To disable autobackup use autobackup period setting, not this mutation.
"""
job = Jobs.add(
name="Trimming autobackup snapshots",
type_id="backups.autobackup_trimming",
description="Pruning the excessive snapshots after the new autobackup quotas are set",
)
try:
Backups.set_autobackup_quotas(quotas)
# this task is async and can fail with only a job to report the error
prune_autobackup_snapshots(job)
return GenericBackupConfigReturn(
success=True,
message="",
@ -115,7 +129,7 @@ class BackupMutations:
except Exception as e:
return GenericBackupConfigReturn(
success=False,
message=str(e),
message=type(e).__name__ + ":" + str(e),
code=400,
configuration=Backup().configuration(),
)

View file

@ -265,6 +265,10 @@ def api_init_without_key(
def assert_ok(data):
if data["success"] is False:
# convenience for debugging, this should display error
# if empty, consider adding helpful messages
raise ValueError(data["code"], data["message"])
assert data["code"] == 200
assert data["success"] is True
@ -302,7 +306,7 @@ def test_snapshots_empty(authorized_client, dummy_service):
assert snaps == []
def test_start_backup(authorized_client, dummy_service):
def test_start_backup(authorized_client, dummy_service, backups):
response = api_backup(authorized_client, dummy_service)
data = get_data(response)["backup"]["startBackup"]
assert data["success"] is True
@ -318,7 +322,7 @@ def test_start_backup(authorized_client, dummy_service):
assert snap["service"]["id"] == "testservice"
def test_restore(authorized_client, dummy_service):
def test_restore(authorized_client, dummy_service, backups):
api_backup(authorized_client, dummy_service)
snap = api_snapshots(authorized_client)[0]
assert snap["id"] is not None
@ -331,7 +335,7 @@ def test_restore(authorized_client, dummy_service):
assert Jobs.get_job(job["uid"]).status == JobStatus.FINISHED
def test_reinit(authorized_client, dummy_service, tmpdir):
def test_reinit(authorized_client, dummy_service, tmpdir, backups):
test_repo_path = path.join(tmpdir, "not_at_all_sus")
response = api_init_without_key(
authorized_client, "FILE", "", "", test_repo_path, ""
@ -353,7 +357,7 @@ def test_reinit(authorized_client, dummy_service, tmpdir):
assert Jobs.get_job(job["uid"]).status == JobStatus.FINISHED
def test_remove(authorized_client, generic_userdata):
def test_remove(authorized_client, generic_userdata, backups):
response = api_remove(authorized_client)
data = get_data(response)["backup"]["removeRepository"]
assert_ok(data)
@ -367,7 +371,7 @@ def test_remove(authorized_client, generic_userdata):
assert configuration["isInitialized"] is False
def test_autobackup_quotas_nonzero(authorized_client):
def test_autobackup_quotas_nonzero(authorized_client, backups):
quotas = _AutobackupQuotas(
last=3,
daily=2,
@ -383,7 +387,7 @@ def test_autobackup_quotas_nonzero(authorized_client):
assert configuration["autobackupQuotas"] == quotas
def test_autobackup_period_nonzero(authorized_client):
def test_autobackup_period_nonzero(authorized_client, backups):
new_period = 11
response = api_set_period(authorized_client, new_period)
data = get_data(response)["backup"]["setAutobackupPeriod"]
@ -393,7 +397,7 @@ def test_autobackup_period_nonzero(authorized_client):
assert configuration["autobackupPeriod"] == new_period
def test_autobackup_period_zero(authorized_client):
def test_autobackup_period_zero(authorized_client, backups):
new_period = 0
# since it is none by default, we better first set it to something non-negative
response = api_set_period(authorized_client, 11)
@ -406,7 +410,7 @@ def test_autobackup_period_zero(authorized_client):
assert configuration["autobackupPeriod"] == None
def test_autobackup_period_none(authorized_client):
def test_autobackup_period_none(authorized_client, backups):
# since it is none by default, we better first set it to something non-negative
response = api_set_period(authorized_client, 11)
# and now we nullify it
@ -418,7 +422,7 @@ def test_autobackup_period_none(authorized_client):
assert configuration["autobackupPeriod"] == None
def test_autobackup_period_negative(authorized_client):
def test_autobackup_period_negative(authorized_client, backups):
# since it is none by default, we better first set it to something non-negative
response = api_set_period(authorized_client, 11)
# and now we nullify it
@ -432,7 +436,7 @@ def test_autobackup_period_negative(authorized_client):
# We cannot really check the effect at this level, we leave it to backend tests
# But we still make it run in both empty and full scenarios and ask for snaps afterwards
def test_reload_snapshots_bare_bare_bare(authorized_client, dummy_service):
def test_reload_snapshots_bare_bare_bare(authorized_client, dummy_service, backups):
api_remove(authorized_client)
response = api_reload_snapshots(authorized_client)
@ -443,7 +447,7 @@ def test_reload_snapshots_bare_bare_bare(authorized_client, dummy_service):
assert snaps == []
def test_reload_snapshots(authorized_client, dummy_service):
def test_reload_snapshots(authorized_client, dummy_service, backups):
response = api_backup(authorized_client, dummy_service)
data = get_data(response)["backup"]["startBackup"]
@ -455,7 +459,7 @@ def test_reload_snapshots(authorized_client, dummy_service):
assert len(snaps) == 1
def test_forget_snapshot(authorized_client, dummy_service):
def test_forget_snapshot(authorized_client, dummy_service, backups):
response = api_backup(authorized_client, dummy_service)
data = get_data(response)["backup"]["startBackup"]
@ -470,7 +474,7 @@ def test_forget_snapshot(authorized_client, dummy_service):
assert len(snaps) == 0
def test_forget_nonexistent_snapshot(authorized_client, dummy_service):
def test_forget_nonexistent_snapshot(authorized_client, dummy_service, backups):
snaps = api_snapshots(authorized_client)
assert len(snaps) == 0
response = api_forget(authorized_client, "898798uekiodpjoiweoiwuoeirueor")

View file

@ -1,52 +1,49 @@
import pytest
import os
import os.path as path
from os import makedirs
from os import remove
from os import listdir
from os import urandom
from datetime import datetime, timedelta, timezone, date, time
from subprocess import Popen
from datetime import datetime, timedelta, timezone
from copy import copy
import secrets
import tempfile
from selfprivacy_api.utils.huey import huey
import selfprivacy_api.services as services
from selfprivacy_api.services import Service, get_all_services
from selfprivacy_api.services.service import ServiceStatus
from selfprivacy_api.services import get_service_by_id
from selfprivacy_api.services.service import ServiceStatus
from selfprivacy_api.services.test_service import DummyService
from selfprivacy_api.graphql.queries.providers import BackupProvider
from selfprivacy_api.graphql.common_types.backup import RestoreStrategy, BackupReason
from selfprivacy_api.graphql.common_types.backup import (
RestoreStrategy,
BackupReason,
AutobackupQuotas,
)
from selfprivacy_api.jobs import Jobs, JobStatus
from selfprivacy_api.models.backup.snapshot import Snapshot
from selfprivacy_api.graphql.common_types.backup import AutobackupQuotas
from selfprivacy_api.backup import Backups, BACKUP_PROVIDER_ENVS
import selfprivacy_api.backup.providers as providers
from selfprivacy_api.backup.providers import AbstractBackupProvider
from selfprivacy_api.backup.providers.backblaze import Backblaze
from selfprivacy_api.backup.providers.none import NoBackups
from selfprivacy_api.backup.util import sync
from selfprivacy_api.backup.backuppers.restic_backupper import ResticBackupper
from selfprivacy_api.backup.jobs import add_backup_job, add_restore_job
from selfprivacy_api.backup.tasks import (
start_backup,
restore_snapshot,
reload_snapshot_cache,
prune_autobackup_snapshots,
)
from selfprivacy_api.backup.storage import Storage
from selfprivacy_api.backup.jobs import get_backup_job
TESTFILE_BODY = "testytest!"
@ -651,6 +648,9 @@ def test_too_many_auto(backups, dummy_service):
# Retroactivity
quota.last = 1
Backups.set_autobackup_quotas(quota)
job = Jobs.add("trimming", "test.autobackup_trimming", "trimming the snaps!")
handle = prune_autobackup_snapshots(job)
handle(blocking=True)
snaps = Backups.get_snapshots(dummy_service)
assert len(snaps) == 1