feature(backups): actual finegrained quotas

This commit is contained in:
Houkime 2023-08-28 17:02:45 +00:00
parent b2c7e8b73a
commit 9207f5385c
4 changed files with 340 additions and 29 deletions

View file

@ -23,7 +23,18 @@ from selfprivacy_api.jobs import Jobs, JobStatus, Job
from selfprivacy_api.graphql.queries.providers import ( from selfprivacy_api.graphql.queries.providers import (
BackupProvider as BackupProviderEnum, BackupProvider as BackupProviderEnum,
) )
from selfprivacy_api.graphql.common_types.backup import RestoreStrategy, BackupReason from selfprivacy_api.graphql.common_types.backup import (
RestoreStrategy,
BackupReason,
AutobackupQuotas,
)
from selfprivacy_api.backup.time import (
same_day,
same_month,
same_week,
same_year,
same_lifetime_of_the_universe,
)
from selfprivacy_api.models.backup.snapshot import Snapshot from selfprivacy_api.models.backup.snapshot import Snapshot
@ -303,20 +314,88 @@ class Backups:
if snap.reason == BackupReason.AUTO if snap.reason == BackupReason.AUTO
] ]
@staticmethod
def add_snap_but_with_quotas(
new_snap: Snapshot, snaps: List[Snapshot], quotas: AutobackupQuotas
) -> None:
quotas_map = {
same_day: quotas.daily,
same_week: quotas.weekly,
same_month: quotas.monthly,
same_year: quotas.yearly,
same_lifetime_of_the_universe: quotas.total,
}
snaps.append(new_snap)
for is_same_period, quota in quotas_map.items():
if quota <= 0:
continue
cohort = [
snap
for snap in snaps
if is_same_period(snap.created_at, new_snap.created_at)
]
sorted_cohort = sorted(cohort, key=lambda s: s.created_at)
n_to_kill = len(cohort) - quota
if n_to_kill > 0:
snaps_to_kill = sorted_cohort[:n_to_kill]
for snap in snaps_to_kill:
snaps.remove(snap)
@staticmethod
def _prune_snaps_with_quotas(snapshots: List[Snapshot]) -> List[Snapshot]:
# Function broken out for testability
sorted_snaps = sorted(snapshots, key=lambda s: s.created_at)
quotas = Backups.autobackup_quotas()
new_snaplist: List[Snapshot] = []
for snap in sorted_snaps:
Backups.add_snap_but_with_quotas(snap, new_snaplist, quotas)
return new_snaplist
@staticmethod @staticmethod
def _prune_auto_snaps(service) -> None: def _prune_auto_snaps(service) -> None:
max = Backups.max_auto_snapshots() # Not very testable by itself, so most testing is going on Backups._prune_snaps_with_quotas
if max == -1: # We can still test total limits and, say, daily limits
return
auto_snaps = Backups._auto_snaps(service) auto_snaps = Backups._auto_snaps(service)
if len(auto_snaps) > max: new_snaplist = Backups._prune_snaps_with_quotas(auto_snaps)
n_to_kill = len(auto_snaps) - max
sorted_snaps = sorted(auto_snaps, key=lambda s: s.created_at) # TODO: Can be optimized since there is forgetting of an array in one restic op
snaps_to_kill = sorted_snaps[:n_to_kill] # but most of the time this will be only one snap to forget.
for snap in snaps_to_kill: for snap in auto_snaps:
if snap not in new_snaplist:
Backups.forget_snapshot(snap) Backups.forget_snapshot(snap)
@staticmethod
def _standardize_quotas(i: int) -> int:
if i <= 0:
i = -1
return i
@staticmethod
def autobackup_quotas() -> AutobackupQuotas:
"""everything <=0 means unlimited"""
return Storage.autobackup_quotas()
@staticmethod
def set_autobackup_quotas(quotas: AutobackupQuotas) -> None:
"""everything <=0 means unlimited"""
Storage.set_autobackup_quotas(
AutobackupQuotas(
daily=Backups._standardize_quotas(quotas.daily),
weekly=Backups._standardize_quotas(quotas.weekly),
monthly=Backups._standardize_quotas(quotas.monthly),
yearly=Backups._standardize_quotas(quotas.yearly),
total=Backups._standardize_quotas(quotas.total),
)
)
@staticmethod @staticmethod
def set_max_auto_snapshots(value: int) -> None: def set_max_auto_snapshots(value: int) -> None:
"""everything <=0 means unlimited""" """everything <=0 means unlimited"""

View file

@ -6,6 +6,10 @@ from datetime import datetime
from selfprivacy_api.models.backup.snapshot import Snapshot from selfprivacy_api.models.backup.snapshot import Snapshot
from selfprivacy_api.models.backup.provider import BackupProviderModel from selfprivacy_api.models.backup.provider import BackupProviderModel
from selfprivacy_api.graphql.common_types.backup import (
AutobackupQuotas,
_AutobackupQuotas,
)
from selfprivacy_api.utils.redis_pool import RedisPool from selfprivacy_api.utils.redis_pool import RedisPool
from selfprivacy_api.utils.redis_model_storage import ( from selfprivacy_api.utils.redis_model_storage import (
@ -27,6 +31,7 @@ REDIS_PROVIDER_KEY = "backups:provider"
REDIS_AUTOBACKUP_PERIOD_KEY = "backups:autobackup_period" REDIS_AUTOBACKUP_PERIOD_KEY = "backups:autobackup_period"
REDIS_AUTOBACKUP_MAX_KEY = "backups:autobackup_cap" REDIS_AUTOBACKUP_MAX_KEY = "backups:autobackup_cap"
REDIS_AUTOBACKUP_QUOTAS_KEY = "backups:autobackup_quotas_key"
redis = RedisPool().get_connection() redis = RedisPool().get_connection()
@ -41,6 +46,7 @@ class Storage:
redis.delete(REDIS_AUTOBACKUP_PERIOD_KEY) redis.delete(REDIS_AUTOBACKUP_PERIOD_KEY)
redis.delete(REDIS_INITTED_CACHE) redis.delete(REDIS_INITTED_CACHE)
redis.delete(REDIS_AUTOBACKUP_MAX_KEY) redis.delete(REDIS_AUTOBACKUP_MAX_KEY)
redis.delete(REDIS_AUTOBACKUP_QUOTAS_KEY)
prefixes_to_clean = [ prefixes_to_clean = [
REDIS_SNAPSHOTS_PREFIX, REDIS_SNAPSHOTS_PREFIX,
@ -178,6 +184,26 @@ class Storage:
"""Marks the repository as initialized""" """Marks the repository as initialized"""
redis.delete(REDIS_INITTED_CACHE) redis.delete(REDIS_INITTED_CACHE)
@staticmethod
def set_autobackup_quotas(quotas: AutobackupQuotas) -> None:
store_model_as_hash(redis, REDIS_AUTOBACKUP_QUOTAS_KEY, quotas.to_pydantic())
@staticmethod
def autobackup_quotas() -> AutobackupQuotas:
quotas_model = hash_as_model(
redis, REDIS_AUTOBACKUP_QUOTAS_KEY, _AutobackupQuotas
)
if quotas_model is None:
unlimited_quotas = AutobackupQuotas(
daily=-1,
weekly=-1,
monthly=-1,
yearly=-1,
total=-1,
)
return unlimited_quotas
return AutobackupQuotas.from_pydantic(quotas_model)
@staticmethod @staticmethod
def set_max_auto_snapshots(value: int): def set_max_auto_snapshots(value: int):
redis.set(REDIS_AUTOBACKUP_MAX_KEY, value) redis.set(REDIS_AUTOBACKUP_MAX_KEY, value)

View file

@ -2,6 +2,7 @@
# pylint: disable=too-few-public-methods # pylint: disable=too-few-public-methods
import strawberry import strawberry
from enum import Enum from enum import Enum
from pydantic import BaseModel
@strawberry.enum @strawberry.enum
@ -15,3 +16,16 @@ class BackupReason(Enum):
EXPLICIT = "EXPLICIT" EXPLICIT = "EXPLICIT"
AUTO = "AUTO" AUTO = "AUTO"
PRE_RESTORE = "PRE_RESTORE" PRE_RESTORE = "PRE_RESTORE"
class _AutobackupQuotas(BaseModel):
daily: int
weekly: int
monthly: int
yearly: int
total: int
@strawberry.experimental.pydantic.type(model=_AutobackupQuotas, all_fields=True)
class AutobackupQuotas:
pass

View file

@ -5,8 +5,12 @@ from os import makedirs
from os import remove from os import remove
from os import listdir from os import listdir
from os import urandom from os import urandom
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone, date, time
from subprocess import Popen from subprocess import Popen
from copy import copy
import secrets
import selfprivacy_api.services as services import selfprivacy_api.services as services
from selfprivacy_api.services import Service, get_all_services from selfprivacy_api.services import Service, get_all_services
@ -19,6 +23,8 @@ from selfprivacy_api.jobs import Jobs, JobStatus
from selfprivacy_api.models.backup.snapshot import Snapshot from selfprivacy_api.models.backup.snapshot import Snapshot
from selfprivacy_api.graphql.common_types.backup import AutobackupQuotas
from selfprivacy_api.backup import Backups, BACKUP_PROVIDER_ENVS from selfprivacy_api.backup import Backups, BACKUP_PROVIDER_ENVS
import selfprivacy_api.backup.providers as providers import selfprivacy_api.backup.providers as providers
from selfprivacy_api.backup.providers import AbstractBackupProvider from selfprivacy_api.backup.providers import AbstractBackupProvider
@ -298,29 +304,215 @@ def test_backup_reasons(backups, dummy_service):
assert snaps[0].reason == BackupReason.AUTO assert snaps[0].reason == BackupReason.AUTO
def test_too_many_auto(backups, dummy_service): unlimited_quotas = AutobackupQuotas(
assert Backups.max_auto_snapshots() == -1 daily=-1,
Backups.set_max_auto_snapshots(2) weekly=-1,
assert Backups.max_auto_snapshots() == 2 monthly=-1,
yearly=-1,
total=-1,
)
snap = Backups.back_up(dummy_service, BackupReason.AUTO)
assert len(Backups.get_snapshots(dummy_service)) == 1
snap2 = Backups.back_up(dummy_service, BackupReason.AUTO)
assert len(Backups.get_snapshots(dummy_service)) == 2
snap3 = Backups.back_up(dummy_service, BackupReason.AUTO)
assert len(Backups.get_snapshots(dummy_service)) == 2
snaps = Backups.get_snapshots(dummy_service) def test_get_empty_quotas(backups):
quotas = Backups.autobackup_quotas()
assert quotas is not None
assert quotas == unlimited_quotas
assert snap2 in snaps
assert snap3 in snaps
assert snap not in snaps
Backups.set_max_auto_snapshots(-1) def test_set_quotas(backups):
snap4 = Backups.back_up(dummy_service, BackupReason.AUTO) quotas = AutobackupQuotas(
snaps = Backups.get_snapshots(dummy_service) daily=2343,
assert len(snaps) == 3 weekly=343,
assert snap4 in snaps monthly=0,
yearly=-34556,
total=563,
)
Backups.set_autobackup_quotas(quotas)
assert Backups.autobackup_quotas() == AutobackupQuotas(
daily=2343,
weekly=343,
monthly=-1,
yearly=-1,
total=563,
)
def dummy_snapshot(date: datetime):
return Snapshot(
id=str(hash(date)),
service_name="someservice",
created_at=date,
reason=BackupReason.EXPLICIT,
)
def test_autobackup_snapshots_pruning(backups):
# Wednesday, fourth week
now = datetime(year=2023, month=1, day=25, hour=10)
snaps = [
dummy_snapshot(now - timedelta(days=365 * 2)),
dummy_snapshot(now - timedelta(days=20)),
dummy_snapshot(now - timedelta(days=2)),
dummy_snapshot(now - timedelta(days=1, hours=3)),
dummy_snapshot(now - timedelta(days=1, hours=2)),
dummy_snapshot(now - timedelta(days=1)),
dummy_snapshot(now - timedelta(hours=2)),
dummy_snapshot(now - timedelta(minutes=5)),
dummy_snapshot(now),
]
old_len = len(snaps)
quotas = copy(unlimited_quotas)
Backups.set_autobackup_quotas(quotas)
assert Backups._prune_snaps_with_quotas(snaps) == snaps
quotas = copy(unlimited_quotas)
quotas.daily = 2
Backups.set_autobackup_quotas(quotas)
pruned_snaps = Backups._prune_snaps_with_quotas(snaps)
assert pruned_snaps == [
dummy_snapshot(now - timedelta(days=365 * 2)),
dummy_snapshot(now - timedelta(days=20)),
dummy_snapshot(now - timedelta(days=2)),
dummy_snapshot(now - timedelta(days=1, hours=2)),
dummy_snapshot(now - timedelta(days=1)),
dummy_snapshot(now - timedelta(minutes=5)),
dummy_snapshot(now),
]
# checking that this function does not mutate the argument
assert snaps != pruned_snaps
assert len(snaps) == old_len
quotas = copy(unlimited_quotas)
quotas.weekly = 4
Backups.set_autobackup_quotas(quotas)
pruned_snaps = Backups._prune_snaps_with_quotas(snaps)
assert pruned_snaps == [
dummy_snapshot(now - timedelta(days=365 * 2)),
dummy_snapshot(now - timedelta(days=20)),
dummy_snapshot(now - timedelta(days=1)),
dummy_snapshot(now - timedelta(hours=2)),
dummy_snapshot(now - timedelta(minutes=5)),
dummy_snapshot(now),
]
quotas = copy(unlimited_quotas)
quotas.monthly = 7
Backups.set_autobackup_quotas(quotas)
pruned_snaps = Backups._prune_snaps_with_quotas(snaps)
assert pruned_snaps == [
dummy_snapshot(now - timedelta(days=365 * 2)),
dummy_snapshot(now - timedelta(days=2)),
dummy_snapshot(now - timedelta(days=1, hours=3)),
dummy_snapshot(now - timedelta(days=1, hours=2)),
dummy_snapshot(now - timedelta(days=1)),
dummy_snapshot(now - timedelta(hours=2)),
dummy_snapshot(now - timedelta(minutes=5)),
dummy_snapshot(now),
]
def test_autobackup_snapshots_pruning_yearly(backups):
snaps = [
dummy_snapshot(datetime(year=2023, month=2, day=1)),
dummy_snapshot(datetime(year=2023, month=3, day=1)),
dummy_snapshot(datetime(year=2023, month=4, day=1)),
dummy_snapshot(datetime(year=2055, month=3, day=1)),
]
quotas = copy(unlimited_quotas)
quotas.yearly = 2
Backups.set_autobackup_quotas(quotas)
pruned_snaps = Backups._prune_snaps_with_quotas(snaps)
assert pruned_snaps == [
dummy_snapshot(datetime(year=2023, month=3, day=1)),
dummy_snapshot(datetime(year=2023, month=4, day=1)),
dummy_snapshot(datetime(year=2055, month=3, day=1)),
]
def test_autobackup_snapshots_pruning_bottleneck(backups):
now = datetime(year=2023, month=1, day=25, hour=10)
snaps = [
dummy_snapshot(now - timedelta(hours=4)),
dummy_snapshot(now - timedelta(hours=3)),
dummy_snapshot(now - timedelta(hours=2)),
dummy_snapshot(now - timedelta(minutes=5)),
dummy_snapshot(now),
]
yearly_quota = copy(unlimited_quotas)
yearly_quota.yearly = 2
monthly_quota = copy(unlimited_quotas)
monthly_quota.monthly = 2
weekly_quota = copy(unlimited_quotas)
weekly_quota.weekly = 2
daily_quota = copy(unlimited_quotas)
daily_quota.daily = 2
total_quota = copy(unlimited_quotas)
total_quota.total = 2
for quota in [total_quota, yearly_quota, monthly_quota, weekly_quota, daily_quota]:
Backups.set_autobackup_quotas(quota)
pruned_snaps = Backups._prune_snaps_with_quotas(snaps)
assert pruned_snaps == [
dummy_snapshot(now - timedelta(minutes=5)),
dummy_snapshot(now),
]
def test_autobackup_snapshots_pruning_edgeweek(backups):
# jan 1 2023 is Sunday
snaps = [
dummy_snapshot(datetime(year=2022, month=12, day=30)),
dummy_snapshot(datetime(year=2022, month=12, day=31)),
dummy_snapshot(datetime(year=2023, month=1, day=1)),
dummy_snapshot(datetime(year=2023, month=1, day=6)),
]
quotas = copy(unlimited_quotas)
quotas.weekly = 2
Backups.set_autobackup_quotas(quotas)
pruned_snaps = Backups._prune_snaps_with_quotas(snaps)
assert pruned_snaps == [
dummy_snapshot(datetime(year=2022, month=12, day=31)),
dummy_snapshot(datetime(year=2023, month=1, day=1)),
dummy_snapshot(datetime(year=2023, month=1, day=6)),
]
# def test_too_many_auto(backups, dummy_service):
# assert Backups.autobackup_quotas()
# Backups.set_max_auto_snapshots(2)
# assert Backups.max_auto_snapshots() == 2
# snap = Backups.back_up(dummy_service, BackupReason.AUTO)
# assert len(Backups.get_snapshots(dummy_service)) == 1
# snap2 = Backups.back_up(dummy_service, BackupReason.AUTO)
# assert len(Backups.get_snapshots(dummy_service)) == 2
# snap3 = Backups.back_up(dummy_service, BackupReason.AUTO)
# assert len(Backups.get_snapshots(dummy_service)) == 2
# snaps = Backups.get_snapshots(dummy_service)
# assert snap2 in snaps
# assert snap3 in snaps
# assert snap not in snaps
# Backups.set_max_auto_snapshots(-1)
# snap4 = Backups.back_up(dummy_service, BackupReason.AUTO)
# snaps = Backups.get_snapshots(dummy_service)
# assert len(snaps) == 3
# assert snap4 in snaps
def folder_files(folder): def folder_files(folder):