refactor(backups): use restic-like rotation policy

This commit is contained in:
Inex Code 2023-09-09 03:26:41 +03:00
parent 56be3d9c31
commit dedd6a9cc9
7 changed files with 278 additions and 163 deletions

View file

@ -4,7 +4,7 @@ This module contains the controller class for backups.
from datetime import datetime, timedelta
import os
from os import statvfs
from typing import List, Optional
from typing import Callable, List, Optional
from selfprivacy_api.utils import ReadUserData, WriteUserData
@ -28,13 +28,7 @@ from selfprivacy_api.graphql.common_types.backup import (
BackupReason,
AutobackupQuotas,
)
from selfprivacy_api.backup.time import (
same_day,
same_month,
same_week,
same_year,
same_lifetime_of_the_universe,
)
from selfprivacy_api.models.backup.snapshot import Snapshot
@ -81,6 +75,24 @@ class NotDeadError(AssertionError):
"""
class RotationBucket:
"""
Bucket object used for rotation.
Has the following mutable fields:
- the counter, int
- the lambda function which takes datetime and the int and returns the int
- the last, int
"""
def __init__(self, counter: int, last: int, rotation_lambda):
self.counter: int = counter
self.last: int = last
self.rotation_lambda: Callable[[datetime, int], int] = rotation_lambda
def __str__(self) -> str:
return f"Bucket(counter={self.counter}, last={self.last})"
class Backups:
"""A stateless controller class for backups"""
@ -314,45 +326,54 @@ class Backups:
if snap.reason == BackupReason.AUTO
]
@staticmethod
def add_snap_but_with_quotas(
new_snap: Snapshot, snaps: List[Snapshot], quotas: AutobackupQuotas
) -> None:
quotas_map = {
same_day: quotas.daily,
same_week: quotas.weekly,
same_month: quotas.monthly,
same_year: quotas.yearly,
same_lifetime_of_the_universe: quotas.total,
}
snaps.append(new_snap)
for is_same_period, quota in quotas_map.items():
if quota <= 0:
continue
cohort = [
snap
for snap in snaps
if is_same_period(snap.created_at, new_snap.created_at)
]
sorted_cohort = sorted(cohort, key=lambda s: s.created_at)
n_to_kill = len(cohort) - quota
if n_to_kill > 0:
snaps_to_kill = sorted_cohort[:n_to_kill]
for snap in snaps_to_kill:
snaps.remove(snap)
@staticmethod
def _prune_snaps_with_quotas(snapshots: List[Snapshot]) -> List[Snapshot]:
# Function broken out for testability
sorted_snaps = sorted(snapshots, key=lambda s: s.created_at)
quotas = Backups.autobackup_quotas()
# Sorting newest first
sorted_snaps = sorted(snapshots, key=lambda s: s.created_at, reverse=True)
quotas: AutobackupQuotas = Backups.autobackup_quotas()
buckets: list[RotationBucket] = [
RotationBucket(
quotas.last,
-1,
lambda _, index: index,
),
RotationBucket(
quotas.daily,
-1,
lambda date, _: date.year * 10000 + date.month * 100 + date.day,
),
RotationBucket(
quotas.weekly,
-1,
lambda date, _: date.year * 100 + date.isocalendar()[1],
),
RotationBucket(
quotas.monthly,
-1,
lambda date, _: date.year * 100 + date.month,
),
RotationBucket(
quotas.yearly,
-1,
lambda date, _: date.year,
),
]
new_snaplist: List[Snapshot] = []
for snap in sorted_snaps:
Backups.add_snap_but_with_quotas(snap, new_snaplist, quotas)
for i, snap in enumerate(sorted_snaps):
keep_snap = False
for bucket in buckets:
if (bucket.counter > 0) or (bucket.counter == -1):
val = bucket.rotation_lambda(snap.created_at, i)
if (val != bucket.last) or (i == len(sorted_snaps) - 1):
bucket.last = val
if bucket.counter > 0:
bucket.counter -= 1
if not keep_snap:
new_snaplist.append(snap)
keep_snap = True
return new_snaplist
@ -372,27 +393,27 @@ class Backups:
@staticmethod
def _standardize_quotas(i: int) -> int:
if i <= 0:
if i <= -1:
i = -1
return i
@staticmethod
def autobackup_quotas() -> AutobackupQuotas:
"""everything <=0 means unlimited"""
"""0 means do not keep, -1 means unlimited"""
return Storage.autobackup_quotas()
@staticmethod
def set_autobackup_quotas(quotas: AutobackupQuotas) -> None:
"""everything <=0 means unlimited"""
"""0 means do not keep, -1 means unlimited"""
Storage.set_autobackup_quotas(
AutobackupQuotas(
last=Backups._standardize_quotas(quotas.last),
daily=Backups._standardize_quotas(quotas.daily),
weekly=Backups._standardize_quotas(quotas.weekly),
monthly=Backups._standardize_quotas(quotas.monthly),
yearly=Backups._standardize_quotas(quotas.yearly),
total=Backups._standardize_quotas(quotas.total),
)
)

View file

@ -5,7 +5,7 @@ import json
import datetime
import tempfile
from typing import List, TypeVar, Callable
from typing import List, Optional, TypeVar, Callable
from collections.abc import Iterable
from json.decoder import JSONDecodeError
from os.path import exists, join
@ -33,12 +33,12 @@ def unlocked_repo(func: T) -> T:
def inner(self: ResticBackupper, *args, **kwargs):
try:
return func(self, *args, **kwargs)
except Exception as e:
if "unable to create lock" in str(e):
except Exception as error:
if "unable to create lock" in str(error):
self.unlock()
return func(self, *args, **kwargs)
else:
raise e
raise error
# Above, we manually guarantee that the type returned is compatible.
return inner # type: ignore
@ -85,7 +85,10 @@ class ResticBackupper(AbstractBackupper):
def _password_command(self):
return f"echo {LocalBackupSecret.get()}"
def restic_command(self, *args, tags: List[str] = []) -> List[str]:
def restic_command(self, *args, tags: Optional[List[str]] = None) -> List[str]:
if tags is None:
tags = []
command = [
"restic",
"-o",
@ -219,7 +222,7 @@ class ResticBackupper(AbstractBackupper):
) from error
@staticmethod
def _snapshot_id_from_backup_messages(messages) -> Snapshot:
def _snapshot_id_from_backup_messages(messages) -> str:
for message in messages:
if message["message_type"] == "summary":
# There is a discrepancy between versions of restic/rclone
@ -317,8 +320,8 @@ class ResticBackupper(AbstractBackupper):
break
if "unable" in line:
raise ValueError(line)
except Exception as e:
raise ValueError("could not lock repository") from e
except Exception as error:
raise ValueError("could not lock repository") from error
@unlocked_repo
def restored_size(self, snapshot_id: str) -> int:
@ -415,6 +418,8 @@ class ResticBackupper(AbstractBackupper):
forget_command = self.restic_command(
"forget",
snapshot_id,
# TODO: prune should be done in a separate process
"--prune",
)
with subprocess.Popen(

View file

@ -193,11 +193,11 @@ class Storage:
)
if quotas_model is None:
unlimited_quotas = AutobackupQuotas(
last=-1,
daily=-1,
weekly=-1,
monthly=-1,
yearly=-1,
total=-1,
)
return unlimited_quotas
return AutobackupQuotas.from_pydantic(quotas_model)
return AutobackupQuotas.from_pydantic(quotas_model) # pylint: disable=no-member

View file

@ -1,29 +0,0 @@
from datetime import datetime, timedelta, time
def same_day(a: datetime, b: datetime) -> bool:
return a.date() == b.date()
def same_week(a: datetime, b: datetime) -> bool:
# doing the hard way because weeks traverse the edges of years
zerobased_weekday = a.isoweekday() - 1
start_of_day = datetime.combine(a.date(), time.min)
start_of_week = start_of_day - timedelta(days=zerobased_weekday)
end_of_week = start_of_week + timedelta(days=7)
if b >= start_of_week and b <= end_of_week:
return True
return False
def same_month(a: datetime, b: datetime) -> bool:
return a.month == b.month and a.year == b.year
def same_year(a: datetime, b: datetime) -> bool:
return a.year == b.year
def same_lifetime_of_the_universe(a: datetime, b: datetime) -> bool:
return True

View file

@ -1,7 +1,7 @@
"""Backup"""
# pylint: disable=too-few-public-methods
import strawberry
from enum import Enum
import strawberry
from pydantic import BaseModel
@ -19,11 +19,11 @@ class BackupReason(Enum):
class _AutobackupQuotas(BaseModel):
last: int
daily: int
weekly: int
monthly: int
yearly: int
total: int
@strawberry.experimental.pydantic.type(model=_AutobackupQuotas, all_fields=True)

View file

@ -58,11 +58,11 @@ mutation TestAutobackupQuotas($input: AutobackupQuotasInput!) {
locationName
locationId
autobackupQuotas {
last
daily
weekly
monthly
yearly
total
}
}
}
@ -368,11 +368,11 @@ def test_remove(authorized_client, generic_userdata):
def test_autobackup_quotas_nonzero(authorized_client):
quotas = _AutobackupQuotas(
last=3,
daily=2,
weekly=4,
monthly=13,
yearly=14,
total=3,
)
response = api_set_quotas(authorized_client, quotas)
data = get_data(response)["backup"]["setAutobackupQuotas"]

View file

@ -305,11 +305,19 @@ def test_backup_reasons(backups, dummy_service):
unlimited_quotas = AutobackupQuotas(
last=-1,
daily=-1,
weekly=-1,
monthly=-1,
yearly=-1,
total=-1,
)
zero_quotas = AutobackupQuotas(
last=0,
daily=0,
weekly=0,
monthly=0,
yearly=0,
)
@ -321,20 +329,66 @@ def test_get_empty_quotas(backups):
def test_set_quotas(backups):
quotas = AutobackupQuotas(
last=3,
daily=2343,
weekly=343,
monthly=0,
yearly=-34556,
total=563,
)
Backups.set_autobackup_quotas(quotas)
assert Backups.autobackup_quotas() == AutobackupQuotas(
last=3,
daily=2343,
weekly=343,
monthly=0,
yearly=-1,
)
def test_set_zero_quotas(backups):
quotas = AutobackupQuotas(
last=0,
daily=0,
weekly=0,
monthly=0,
yearly=0,
)
Backups.set_autobackup_quotas(quotas)
assert Backups.autobackup_quotas() == zero_quotas
def test_set_unlimited_quotas(backups):
quotas = AutobackupQuotas(
last=-1,
daily=-1,
weekly=-1,
monthly=-1,
yearly=-1,
total=563,
)
Backups.set_autobackup_quotas(quotas)
assert Backups.autobackup_quotas() == unlimited_quotas
def test_set_zero_quotas_after_unlimited(backups):
quotas = AutobackupQuotas(
last=-1,
daily=-1,
weekly=-1,
monthly=-1,
yearly=-1,
)
Backups.set_autobackup_quotas(quotas)
assert Backups.autobackup_quotas() == unlimited_quotas
quotas = AutobackupQuotas(
last=0,
daily=0,
weekly=0,
monthly=0,
yearly=0,
)
Backups.set_autobackup_quotas(quotas)
assert Backups.autobackup_quotas() == zero_quotas
def dummy_snapshot(date: datetime):
@ -351,15 +405,24 @@ def test_autobackup_snapshots_pruning(backups):
now = datetime(year=2023, month=1, day=25, hour=10)
snaps = [
dummy_snapshot(now - timedelta(days=365 * 2)),
dummy_snapshot(now - timedelta(days=20)),
dummy_snapshot(now - timedelta(days=2)),
dummy_snapshot(now - timedelta(days=1, hours=3)),
dummy_snapshot(now - timedelta(days=1, hours=2)),
dummy_snapshot(now - timedelta(days=1)),
dummy_snapshot(now - timedelta(hours=2)),
dummy_snapshot(now - timedelta(minutes=5)),
dummy_snapshot(now),
dummy_snapshot(now - timedelta(minutes=5)),
dummy_snapshot(now - timedelta(hours=2)),
dummy_snapshot(now - timedelta(hours=5)),
dummy_snapshot(now - timedelta(days=1)),
dummy_snapshot(now - timedelta(days=1, hours=2)),
dummy_snapshot(now - timedelta(days=1, hours=3)),
dummy_snapshot(now - timedelta(days=2)),
dummy_snapshot(now - timedelta(days=7)),
dummy_snapshot(now - timedelta(days=12)),
dummy_snapshot(now - timedelta(days=23)),
dummy_snapshot(now - timedelta(days=28)),
dummy_snapshot(now - timedelta(days=32)),
dummy_snapshot(now - timedelta(days=47)),
dummy_snapshot(now - timedelta(days=64)),
dummy_snapshot(now - timedelta(days=84)),
dummy_snapshot(now - timedelta(days=104)),
dummy_snapshot(now - timedelta(days=365 * 2)),
]
old_len = len(snaps)
@ -367,135 +430,190 @@ def test_autobackup_snapshots_pruning(backups):
Backups.set_autobackup_quotas(quotas)
assert Backups._prune_snaps_with_quotas(snaps) == snaps
quotas = copy(unlimited_quotas)
quotas = copy(zero_quotas)
quotas.last = 2
quotas.daily = 2
Backups.set_autobackup_quotas(quotas)
pruned_snaps = Backups._prune_snaps_with_quotas(snaps)
assert pruned_snaps == [
dummy_snapshot(now - timedelta(days=365 * 2)),
dummy_snapshot(now - timedelta(days=20)),
dummy_snapshot(now - timedelta(days=2)),
dummy_snapshot(now - timedelta(days=1, hours=2)),
dummy_snapshot(now - timedelta(days=1)),
dummy_snapshot(now - timedelta(minutes=5)),
snaps_to_keep = Backups._prune_snaps_with_quotas(snaps)
assert snaps_to_keep == [
dummy_snapshot(now),
dummy_snapshot(now - timedelta(minutes=5)),
# dummy_snapshot(now - timedelta(hours=2)),
# dummy_snapshot(now - timedelta(hours=5)),
dummy_snapshot(now - timedelta(days=1)),
# dummy_snapshot(now - timedelta(days=1, hours=2)),
# dummy_snapshot(now - timedelta(days=1, hours=3)),
# dummy_snapshot(now - timedelta(days=2)),
# dummy_snapshot(now - timedelta(days=7)),
# dummy_snapshot(now - timedelta(days=12)),
# dummy_snapshot(now - timedelta(days=23)),
# dummy_snapshot(now - timedelta(days=28)),
# dummy_snapshot(now - timedelta(days=32)),
# dummy_snapshot(now - timedelta(days=47)),
# dummy_snapshot(now - timedelta(days=64)),
# dummy_snapshot(now - timedelta(days=84)),
# dummy_snapshot(now - timedelta(days=104)),
# dummy_snapshot(now - timedelta(days=365 * 2)),
]
# checking that this function does not mutate the argument
assert snaps != pruned_snaps
assert snaps != snaps_to_keep
assert len(snaps) == old_len
quotas = copy(unlimited_quotas)
quotas = copy(zero_quotas)
quotas.weekly = 4
Backups.set_autobackup_quotas(quotas)
pruned_snaps = Backups._prune_snaps_with_quotas(snaps)
assert pruned_snaps == [
dummy_snapshot(now - timedelta(days=365 * 2)),
dummy_snapshot(now - timedelta(days=20)),
dummy_snapshot(now - timedelta(days=1)),
dummy_snapshot(now - timedelta(hours=2)),
dummy_snapshot(now - timedelta(minutes=5)),
snaps_to_keep = Backups._prune_snaps_with_quotas(snaps)
assert snaps_to_keep == [
dummy_snapshot(now),
# dummy_snapshot(now - timedelta(minutes=5)),
# dummy_snapshot(now - timedelta(hours=2)),
# dummy_snapshot(now - timedelta(hours=5)),
# dummy_snapshot(now - timedelta(days=1)),
# dummy_snapshot(now - timedelta(days=1, hours=2)),
# dummy_snapshot(now - timedelta(days=1, hours=3)),
# dummy_snapshot(now - timedelta(days=2)),
dummy_snapshot(now - timedelta(days=7)),
dummy_snapshot(now - timedelta(days=12)),
dummy_snapshot(now - timedelta(days=23)),
# dummy_snapshot(now - timedelta(days=28)),
# dummy_snapshot(now - timedelta(days=32)),
# dummy_snapshot(now - timedelta(days=47)),
# dummy_snapshot(now - timedelta(days=64)),
# dummy_snapshot(now - timedelta(days=84)),
# dummy_snapshot(now - timedelta(days=104)),
# dummy_snapshot(now - timedelta(days=365 * 2)),
]
quotas = copy(unlimited_quotas)
quotas = copy(zero_quotas)
quotas.monthly = 7
Backups.set_autobackup_quotas(quotas)
pruned_snaps = Backups._prune_snaps_with_quotas(snaps)
assert pruned_snaps == [
dummy_snapshot(now - timedelta(days=365 * 2)),
dummy_snapshot(now - timedelta(days=2)),
dummy_snapshot(now - timedelta(days=1, hours=3)),
dummy_snapshot(now - timedelta(days=1, hours=2)),
dummy_snapshot(now - timedelta(days=1)),
dummy_snapshot(now - timedelta(hours=2)),
dummy_snapshot(now - timedelta(minutes=5)),
snaps_to_keep = Backups._prune_snaps_with_quotas(snaps)
assert snaps_to_keep == [
dummy_snapshot(now),
# dummy_snapshot(now - timedelta(minutes=5)),
# dummy_snapshot(now - timedelta(hours=2)),
# dummy_snapshot(now - timedelta(hours=5)),
# dummy_snapshot(now - timedelta(days=1)),
# dummy_snapshot(now - timedelta(days=1, hours=2)),
# dummy_snapshot(now - timedelta(days=1, hours=3)),
# dummy_snapshot(now - timedelta(days=2)),
# dummy_snapshot(now - timedelta(days=7)),
# dummy_snapshot(now - timedelta(days=12)),
# dummy_snapshot(now - timedelta(days=23)),
dummy_snapshot(now - timedelta(days=28)),
# dummy_snapshot(now - timedelta(days=32)),
# dummy_snapshot(now - timedelta(days=47)),
dummy_snapshot(now - timedelta(days=64)),
# dummy_snapshot(now - timedelta(days=84)),
dummy_snapshot(now - timedelta(days=104)),
dummy_snapshot(now - timedelta(days=365 * 2)),
]
def test_autobackup_snapshots_pruning_yearly(backups):
snaps = [
dummy_snapshot(datetime(year=2023, month=2, day=1)),
dummy_snapshot(datetime(year=2023, month=3, day=1)),
dummy_snapshot(datetime(year=2023, month=4, day=1)),
dummy_snapshot(datetime(year=2055, month=3, day=1)),
dummy_snapshot(datetime(year=2055, month=2, day=1)),
dummy_snapshot(datetime(year=2023, month=4, day=1)),
dummy_snapshot(datetime(year=2023, month=3, day=1)),
dummy_snapshot(datetime(year=2023, month=2, day=1)),
dummy_snapshot(datetime(year=2021, month=2, day=1)),
]
quotas = copy(unlimited_quotas)
quotas = copy(zero_quotas)
quotas.yearly = 2
Backups.set_autobackup_quotas(quotas)
pruned_snaps = Backups._prune_snaps_with_quotas(snaps)
assert pruned_snaps == [
dummy_snapshot(datetime(year=2023, month=3, day=1)),
dummy_snapshot(datetime(year=2023, month=4, day=1)),
snaps_to_keep = Backups._prune_snaps_with_quotas(snaps)
assert snaps_to_keep == [
dummy_snapshot(datetime(year=2055, month=3, day=1)),
dummy_snapshot(datetime(year=2023, month=4, day=1)),
]
def test_autobackup_snapshots_pruning_bottleneck(backups):
now = datetime(year=2023, month=1, day=25, hour=10)
snaps = [
dummy_snapshot(now - timedelta(hours=4)),
dummy_snapshot(now - timedelta(hours=3)),
dummy_snapshot(now - timedelta(hours=2)),
dummy_snapshot(now - timedelta(minutes=5)),
dummy_snapshot(now),
dummy_snapshot(now - timedelta(minutes=5)),
dummy_snapshot(now - timedelta(hours=2)),
dummy_snapshot(now - timedelta(hours=3)),
dummy_snapshot(now - timedelta(hours=4)),
]
yearly_quota = copy(unlimited_quotas)
yearly_quota = copy(zero_quotas)
yearly_quota.yearly = 2
monthly_quota = copy(unlimited_quotas)
monthly_quota = copy(zero_quotas)
monthly_quota.monthly = 2
weekly_quota = copy(unlimited_quotas)
weekly_quota = copy(zero_quotas)
weekly_quota.weekly = 2
daily_quota = copy(unlimited_quotas)
daily_quota = copy(zero_quotas)
daily_quota.daily = 2
total_quota = copy(unlimited_quotas)
total_quota.total = 2
last_quota = copy(zero_quotas)
last_quota.last = 1
last_quota.yearly = 2
for quota in [total_quota, yearly_quota, monthly_quota, weekly_quota, daily_quota]:
for quota in [last_quota, yearly_quota, monthly_quota, weekly_quota, daily_quota]:
print(quota)
Backups.set_autobackup_quotas(quota)
pruned_snaps = Backups._prune_snaps_with_quotas(snaps)
assert pruned_snaps == [
dummy_snapshot(now - timedelta(minutes=5)),
snaps_to_keep = Backups._prune_snaps_with_quotas(snaps)
assert snaps_to_keep == [
dummy_snapshot(now),
# If there is a vacant quota, we should keep the last snapshot even if it doesn't fit
dummy_snapshot(now - timedelta(hours=4)),
]
def test_autobackup_snapshots_pruning_edgeweek(backups):
# jan 1 2023 is Sunday
snaps = [
dummy_snapshot(datetime(year=2022, month=12, day=30)),
dummy_snapshot(datetime(year=2022, month=12, day=31)),
dummy_snapshot(datetime(year=2023, month=1, day=1)),
dummy_snapshot(datetime(year=2023, month=1, day=6)),
dummy_snapshot(datetime(year=2023, month=1, day=1)),
dummy_snapshot(datetime(year=2022, month=12, day=31)),
dummy_snapshot(datetime(year=2022, month=12, day=30)),
]
quotas = copy(unlimited_quotas)
quotas = copy(zero_quotas)
quotas.weekly = 2
Backups.set_autobackup_quotas(quotas)
pruned_snaps = Backups._prune_snaps_with_quotas(snaps)
assert pruned_snaps == [
dummy_snapshot(datetime(year=2022, month=12, day=31)),
dummy_snapshot(datetime(year=2023, month=1, day=1)),
snaps_to_keep = Backups._prune_snaps_with_quotas(snaps)
assert snaps_to_keep == [
dummy_snapshot(datetime(year=2023, month=1, day=6)),
dummy_snapshot(datetime(year=2023, month=1, day=1)),
]
def test_autobackup_snapshots_pruning_big_gap(backups):
snaps = [
dummy_snapshot(datetime(year=2023, month=1, day=6)),
dummy_snapshot(datetime(year=2023, month=1, day=2)),
dummy_snapshot(datetime(year=2022, month=10, day=31)),
dummy_snapshot(datetime(year=2022, month=10, day=30)),
]
quotas = copy(zero_quotas)
quotas.weekly = 2
Backups.set_autobackup_quotas(quotas)
snaps_to_keep = Backups._prune_snaps_with_quotas(snaps)
assert snaps_to_keep == [
dummy_snapshot(datetime(year=2023, month=1, day=6)),
dummy_snapshot(datetime(year=2022, month=10, day=31)),
]
def test_too_many_auto(backups, dummy_service):
assert Backups.autobackup_quotas()
quota = copy(unlimited_quotas)
quota.total = 2
quota = copy(zero_quotas)
quota.last = 2
Backups.set_autobackup_quotas(quota)
assert Backups.autobackup_quotas().total == 2
assert Backups.autobackup_quotas().last == 2
snap = Backups.back_up(dummy_service, BackupReason.AUTO)
assert len(Backups.get_snapshots(dummy_service)) == 1
@ -509,7 +627,7 @@ def test_too_many_auto(backups, dummy_service):
assert snap3 in snaps
assert snap not in snaps
quota.total = -1
quota.last = -1
Backups.set_autobackup_quotas(quota)
snap4 = Backups.back_up(dummy_service, BackupReason.AUTO)
@ -518,7 +636,7 @@ def test_too_many_auto(backups, dummy_service):
assert snap4 in snaps
# Retroactivity
quota.total = 1
quota.last = 1
Backups.set_autobackup_quotas(quota)
snaps = Backups.get_snapshots(dummy_service)
assert len(snaps) == 1