selfprivacy-rest-api/tests/test_backup.py

717 lines
21 KiB
Python
Raw Normal View History

2023-02-03 17:04:35 +00:00
import pytest
import os
import os.path as path
2023-02-22 15:58:36 +00:00
from os import remove
from os import listdir
from os import urandom
2023-02-03 17:04:35 +00:00
from datetime import datetime, timedelta, timezone
import tempfile
from selfprivacy_api.utils.huey import huey
from selfprivacy_api.services.service import ServiceStatus
2023-04-10 13:22:33 +00:00
from selfprivacy_api.graphql.queries.providers import BackupProvider
from selfprivacy_api.graphql.common_types.backup import (
RestoreStrategy,
BackupReason,
)
from selfprivacy_api.jobs import Jobs, JobStatus
2023-07-05 13:13:30 +00:00
from selfprivacy_api.models.backup.snapshot import Snapshot
from selfprivacy_api.backup import Backups, BACKUP_PROVIDER_ENVS
2023-02-01 11:58:55 +00:00
import selfprivacy_api.backup.providers as providers
from selfprivacy_api.backup.providers import AbstractBackupProvider
from selfprivacy_api.backup.providers.backblaze import Backblaze
from selfprivacy_api.backup.providers.none import NoBackups
from selfprivacy_api.backup.providers import get_kind
from selfprivacy_api.backup.util import sync
from selfprivacy_api.backup.tasks import (
start_backup,
restore_snapshot,
reload_snapshot_cache,
)
2023-04-10 13:22:33 +00:00
from selfprivacy_api.backup.storage import Storage
2023-02-01 11:58:55 +00:00
2023-02-08 14:05:25 +00:00
REPO_NAME = "test_backup"
REPOFILE_NAME = "totallyunrelated"
def prepare_localfile_backups(temp_dir):
test_repo_path = path.join(temp_dir, REPOFILE_NAME)
assert not path.exists(test_repo_path)
Backups.set_localfile_repo(test_repo_path)
@pytest.fixture(scope="function")
def backups_local(tmpdir):
Backups.reset()
prepare_localfile_backups(tmpdir)
Jobs.reset()
Backups.init_repo()
@pytest.fixture(scope="function")
def backups(tmpdir):
"""
For those tests that are supposed to pass with
both local and cloud repos
"""
# Sometimes this is false. Idk why.
huey.immediate = True
assert huey.immediate is True
Backups.reset()
if BACKUP_PROVIDER_ENVS["kind"] in os.environ.keys():
Backups.set_provider_from_envs()
else:
prepare_localfile_backups(tmpdir)
Jobs.reset()
Backups.init_repo()
assert Backups.provider().location == str(tmpdir) + "/" + REPOFILE_NAME
yield
Backups.erase_repo()
@pytest.fixture()
2023-02-08 14:05:25 +00:00
def memory_backup() -> AbstractBackupProvider:
ProviderClass = providers.get_provider(BackupProvider.MEMORY)
assert ProviderClass is not None
memory_provider = ProviderClass(login="", key="")
assert memory_provider is not None
return memory_provider
@pytest.fixture()
def file_backup(tmpdir) -> AbstractBackupProvider:
test_repo_path = path.join(tmpdir, "test_repo")
ProviderClass = providers.get_provider(BackupProvider.FILE)
assert ProviderClass is not None
provider = ProviderClass(location=test_repo_path)
assert provider is not None
return provider
def test_reset_sets_to_none1():
Backups.reset()
provider = Backups.provider()
assert provider is not None
assert isinstance(provider, NoBackups)
def test_reset_sets_to_none2(backups):
# now with something set up first^^^
Backups.reset()
provider = Backups.provider()
assert provider is not None
assert isinstance(provider, NoBackups)
def test_setting_from_envs(tmpdir):
Backups.reset()
environment_stash = {}
if BACKUP_PROVIDER_ENVS["kind"] in os.environ.keys():
# we are running under special envs, stash them before rewriting them
for key in BACKUP_PROVIDER_ENVS.values():
environment_stash[key] = os.environ[key]
os.environ[BACKUP_PROVIDER_ENVS["kind"]] = "BACKBLAZE"
os.environ[BACKUP_PROVIDER_ENVS["login"]] = "ID"
os.environ[BACKUP_PROVIDER_ENVS["key"]] = "KEY"
os.environ[BACKUP_PROVIDER_ENVS["location"]] = "selfprivacy"
Backups.set_provider_from_envs()
provider = Backups.provider()
assert provider is not None
assert isinstance(provider, Backblaze)
assert provider.login == "ID"
assert provider.key == "KEY"
assert provider.location == "selfprivacy"
2023-03-10 14:14:41 +00:00
assert provider.backupper.account == "ID"
assert provider.backupper.key == "KEY"
if environment_stash != {}:
for key in BACKUP_PROVIDER_ENVS.values():
os.environ[key] = environment_stash[key]
else:
for key in BACKUP_PROVIDER_ENVS.values():
del os.environ[key]
2023-03-10 14:14:41 +00:00
2023-02-01 11:58:55 +00:00
def test_select_backend():
provider = providers.get_provider(BackupProvider.BACKBLAZE)
assert provider is not None
assert provider == Backblaze
2023-02-03 17:04:35 +00:00
def test_file_backend_init(file_backup):
file_backup.backupper.init()
def test_reinit_after_purge(backups):
assert Backups.is_initted() is True
Backups.erase_repo()
assert Backups.is_initted() is False
with pytest.raises(ValueError):
Backups.force_snapshot_cache_reload()
Backups.init_repo()
assert Backups.is_initted() is True
assert len(Backups.get_all_snapshots()) == 0
def test_backup_service(dummy_service, backups):
id = dummy_service.get_id()
assert_job_finished(f"services.{id}.backup", count=0)
assert Backups.get_last_backed_up(dummy_service) is None
Backups.back_up(dummy_service)
2023-02-13 11:16:35 +00:00
now = datetime.now(timezone.utc)
date = Backups.get_last_backed_up(dummy_service)
assert date is not None
assert now > date
assert now - date < timedelta(minutes=1)
assert_job_finished(f"services.{id}.backup", count=1)
2023-02-13 11:16:35 +00:00
def test_no_repo(memory_backup):
with pytest.raises(ValueError):
assert memory_backup.backupper.get_snapshots() == []
def test_one_snapshot(backups, dummy_service):
Backups.back_up(dummy_service)
snaps = Backups.get_snapshots(dummy_service)
assert len(snaps) == 1
snap = snaps[0]
assert snap.service_name == dummy_service.get_id()
2023-02-22 15:58:36 +00:00
def test_backup_returns_snapshot(backups, dummy_service):
service_folders = dummy_service.get_folders()
provider = Backups.provider()
name = dummy_service.get_id()
snapshot = provider.backupper.start_backup(service_folders, name)
assert snapshot.id is not None
snapshots = provider.backupper.get_snapshots()
assert snapshots != []
assert len(snapshot.id) == len(snapshots[0].id)
assert Backups.get_snapshot_by_id(snapshot.id) is not None
assert snapshot.service_name == name
assert snapshot.created_at is not None
2023-08-21 11:30:35 +00:00
assert snapshot.reason == BackupReason.EXPLICIT
def test_backup_reasons(backups, dummy_service):
snap = Backups.back_up(dummy_service, BackupReason.AUTO)
assert snap.reason == BackupReason.AUTO
Backups.force_snapshot_cache_reload()
snaps = Backups.get_snapshots(dummy_service)
assert snaps[0].reason == BackupReason.AUTO
2023-07-03 12:54:43 +00:00
def folder_files(folder):
return [
path.join(folder, filename)
for filename in listdir(folder)
if filename is not None
]
2023-04-19 15:09:06 +00:00
def service_files(service):
result = []
for service_folder in service.get_folders():
2023-07-03 12:54:43 +00:00
result.extend(folder_files(service_folder))
2023-04-19 15:09:06 +00:00
return result
2023-02-22 15:58:36 +00:00
def test_restore(backups, dummy_service):
2023-04-19 15:09:06 +00:00
paths_to_nuke = service_files(dummy_service)
contents = []
2023-04-19 15:09:06 +00:00
for service_file in paths_to_nuke:
with open(service_file, "r") as file:
contents.append(file.read())
2023-02-22 15:58:36 +00:00
Backups.back_up(dummy_service)
snap = Backups.get_snapshots(dummy_service)[0]
2023-02-22 15:58:36 +00:00
assert snap is not None
for p in paths_to_nuke:
assert path.exists(p)
remove(p)
assert not path.exists(p)
2023-02-22 15:58:36 +00:00
Backups._restore_service_from_snapshot(dummy_service, snap.id)
for p, content in zip(paths_to_nuke, contents):
assert path.exists(p)
with open(p, "r") as file:
assert file.read() == content
2023-02-22 18:48:08 +00:00
def test_sizing(backups, dummy_service):
Backups.back_up(dummy_service)
snap = Backups.get_snapshots(dummy_service)[0]
size = Backups.snapshot_restored_size(snap.id)
2023-02-22 18:48:08 +00:00
assert size is not None
assert size > 0
def test_init_tracking(backups, tmpdir):
assert Backups.is_initted() is True
Backups.reset()
assert Backups.is_initted() is False
separate_dir = tmpdir / "out_of_the_way"
prepare_localfile_backups(separate_dir)
Backups.init_repo()
2023-03-14 00:39:15 +00:00
assert Backups.is_initted() is True
2023-03-29 11:45:52 +00:00
def finished_jobs():
return [job for job in Jobs.get_jobs() if job.status is JobStatus.FINISHED]
def assert_job_finished(job_type, count):
finished_types = [job.type_id for job in finished_jobs()]
assert finished_types.count(job_type) == count
def assert_job_has_run(job_type):
job = [job for job in finished_jobs() if job.type_id == job_type][0]
assert JobStatus.RUNNING in Jobs.status_updates(job)
def job_progress_updates(job_type):
job = [job for job in finished_jobs() if job.type_id == job_type][0]
return Jobs.progress_updates(job)
def assert_job_had_progress(job_type):
assert len(job_progress_updates(job_type)) > 0
def make_large_file(path: str, bytes: int):
with open(path, "wb") as file:
file.write(urandom(bytes))
def test_snapshots_by_id(backups, dummy_service):
snap1 = Backups.back_up(dummy_service)
snap2 = Backups.back_up(dummy_service)
snap3 = Backups.back_up(dummy_service)
assert snap2.id is not None
assert snap2.id != ""
assert len(Backups.get_snapshots(dummy_service)) == 3
assert Backups.get_snapshot_by_id(snap2.id).id == snap2.id
@pytest.fixture(params=["instant_server_stop", "delayed_server_stop"])
def simulated_service_stopping_delay(request) -> float:
if request.param == "instant_server_stop":
return 0.0
else:
return 0.3
def test_backup_service_task(backups, dummy_service, simulated_service_stopping_delay):
dummy_service.set_delay(simulated_service_stopping_delay)
handle = start_backup(dummy_service.get_id())
2023-03-29 11:45:52 +00:00
handle(blocking=True)
snaps = Backups.get_snapshots(dummy_service)
assert len(snaps) == 1
id = dummy_service.get_id()
job_type_id = f"services.{id}.backup"
assert_job_finished(job_type_id, count=1)
assert_job_has_run(job_type_id)
assert_job_had_progress(job_type_id)
2023-07-05 13:13:30 +00:00
def test_forget_snapshot(backups, dummy_service):
snap1 = Backups.back_up(dummy_service)
snap2 = Backups.back_up(dummy_service)
assert len(Backups.get_snapshots(dummy_service)) == 2
Backups.forget_snapshot(snap2)
assert len(Backups.get_snapshots(dummy_service)) == 1
Backups.force_snapshot_cache_reload()
assert len(Backups.get_snapshots(dummy_service)) == 1
assert Backups.get_snapshots(dummy_service)[0].id == snap1.id
Backups.forget_snapshot(snap1)
assert len(Backups.get_snapshots(dummy_service)) == 0
def test_forget_nonexistent_snapshot(backups, dummy_service):
bogus = Snapshot(
id="gibberjibber",
service_name="nohoho",
created_at=datetime.now(timezone.utc),
reason=BackupReason.EXPLICIT,
2023-07-05 13:13:30 +00:00
)
with pytest.raises(ValueError):
Backups.forget_snapshot(bogus)
def test_backup_larger_file(backups, dummy_service):
dir = path.join(dummy_service.get_folders()[0], "LARGEFILE")
mega = 2**20
make_large_file(dir, 100 * mega)
handle = start_backup(dummy_service.get_id())
handle(blocking=True)
# results will be slightly different on different machines. if someone has troubles with it on their machine, consider dropping this test.
id = dummy_service.get_id()
job_type_id = f"services.{id}.backup"
assert_job_finished(job_type_id, count=1)
assert_job_has_run(job_type_id)
updates = job_progress_updates(job_type_id)
assert len(updates) > 3
2023-06-29 10:44:29 +00:00
assert updates[int((len(updates) - 1) / 2.0)] > 10
2023-07-03 15:28:12 +00:00
# clean up a bit
remove(dir)
@pytest.fixture(params=["verify", "inplace"])
def restore_strategy(request) -> RestoreStrategy:
if request.param == "verify":
return RestoreStrategy.DOWNLOAD_VERIFY_OVERWRITE
else:
return RestoreStrategy.INPLACE
@pytest.fixture(params=["failed", "healthy"])
def failed(request) -> bool:
if request.param == "failed":
return True
return False
def test_restore_snapshot_task(
backups, dummy_service, restore_strategy, simulated_service_stopping_delay, failed
):
dummy_service.set_delay(simulated_service_stopping_delay)
if failed:
dummy_service.set_status(ServiceStatus.FAILED)
2023-04-19 15:09:06 +00:00
Backups.back_up(dummy_service)
snaps = Backups.get_snapshots(dummy_service)
assert len(snaps) == 1
paths_to_nuke = service_files(dummy_service)
contents = []
for service_file in paths_to_nuke:
with open(service_file, "r") as file:
contents.append(file.read())
for p in paths_to_nuke:
remove(p)
handle = restore_snapshot(snaps[0], restore_strategy)
2023-04-19 15:09:06 +00:00
handle(blocking=True)
for p, content in zip(paths_to_nuke, contents):
assert path.exists(p)
with open(p, "r") as file:
assert file.read() == content
snaps = Backups.get_snapshots(dummy_service)
if restore_strategy == RestoreStrategy.INPLACE:
assert len(snaps) == 2
2023-08-21 11:30:35 +00:00
reasons = [snap.reason for snap in snaps]
assert BackupReason.PRE_RESTORE in reasons
else:
assert len(snaps) == 1
2023-04-19 15:09:06 +00:00
def test_backup_unbackuppable(backups, dummy_service):
dummy_service.set_backuppable(False)
assert dummy_service.can_be_backed_up() is False
with pytest.raises(ValueError):
Backups.back_up(dummy_service)
2023-04-10 13:22:33 +00:00
# Storage
def test_snapshots_caching(backups, dummy_service):
Backups.back_up(dummy_service)
# we test indirectly that we do redis calls instead of shell calls
start = datetime.now()
for i in range(10):
snapshots = Backups.get_snapshots(dummy_service)
assert len(snapshots) == 1
assert datetime.now() - start < timedelta(seconds=0.5)
cached_snapshots = Storage.get_cached_snapshots()
assert len(cached_snapshots) == 1
snap_to_uncache = cached_snapshots[0]
Storage.delete_cached_snapshot(snap_to_uncache)
2023-04-10 13:22:33 +00:00
cached_snapshots = Storage.get_cached_snapshots()
assert len(cached_snapshots) == 0
# We do not assume that no snapshots means we need to reload the cache
2023-04-10 13:22:33 +00:00
snapshots = Backups.get_snapshots(dummy_service)
assert len(snapshots) == 0
# No cache reload happened
2023-04-10 13:22:33 +00:00
cached_snapshots = Storage.get_cached_snapshots()
assert len(cached_snapshots) == 0
2023-04-10 13:22:33 +00:00
# Storage
def test_snapshot_cache_autoreloads(backups, dummy_service):
Backups.back_up(dummy_service)
cached_snapshots = Storage.get_cached_snapshots()
assert len(cached_snapshots) == 1
snap_to_uncache = cached_snapshots[0]
Storage.delete_cached_snapshot(snap_to_uncache)
cached_snapshots = Storage.get_cached_snapshots()
assert len(cached_snapshots) == 0
# When we create a snapshot we do reload cache
Backups.back_up(dummy_service)
cached_snapshots = Storage.get_cached_snapshots()
assert len(cached_snapshots) == 2
assert snap_to_uncache in cached_snapshots
Storage.delete_cached_snapshot(snap_to_uncache)
cached_snapshots = Storage.get_cached_snapshots()
assert len(cached_snapshots) == 1
# When we try to delete a snapshot we cannot find in cache, it is ok and we do reload cache
Backups.forget_snapshot(snap_to_uncache)
cached_snapshots = Storage.get_cached_snapshots()
assert len(cached_snapshots) == 1
assert snap_to_uncache not in cached_snapshots
def lowlevel_forget(snapshot_id):
Backups.provider().backupper.forget_snapshot(snapshot_id)
# Storage
def test_snapshots_cache_invalidation(backups, dummy_service):
Backups.back_up(dummy_service)
cached_snapshots = Storage.get_cached_snapshots()
assert len(cached_snapshots) == 1
Storage.invalidate_snapshot_storage()
cached_snapshots = Storage.get_cached_snapshots()
assert len(cached_snapshots) == 0
Backups.force_snapshot_cache_reload()
cached_snapshots = Storage.get_cached_snapshots()
assert len(cached_snapshots) == 1
snap = cached_snapshots[0]
lowlevel_forget(snap.id)
cached_snapshots = Storage.get_cached_snapshots()
assert len(cached_snapshots) == 1
Backups.force_snapshot_cache_reload()
cached_snapshots = Storage.get_cached_snapshots()
assert len(cached_snapshots) == 0
2023-04-10 13:22:33 +00:00
# Storage
def test_init_tracking_caching(backups, raw_dummy_service):
assert Storage.has_init_mark() is True
Backups.reset()
assert Storage.has_init_mark() is False
2023-04-10 13:22:33 +00:00
Storage.mark_as_init()
2023-04-10 13:22:33 +00:00
assert Storage.has_init_mark() is True
assert Backups.is_initted() is True
2023-04-10 13:22:33 +00:00
# Storage
def test_init_tracking_caching2(backups, tmpdir):
assert Storage.has_init_mark() is True
Backups.reset()
assert Storage.has_init_mark() is False
separate_dir = tmpdir / "out_of_the_way"
prepare_localfile_backups(separate_dir)
assert Storage.has_init_mark() is False
2023-04-10 13:22:33 +00:00
Backups.init_repo()
2023-04-10 13:22:33 +00:00
assert Storage.has_init_mark() is True
2023-04-10 13:22:33 +00:00
# Storage
def test_provider_storage(backups):
test_login = "ID"
test_key = "KEY"
test_location = "selprivacy_bin"
2023-04-10 13:22:33 +00:00
old_provider = Backups.provider()
assert old_provider is not None
2023-04-10 13:22:33 +00:00
assert not isinstance(old_provider, Backblaze)
assert old_provider.login != test_login
assert old_provider.key != test_key
assert old_provider.location != test_location
test_provider = Backups._construct_provider(
kind=BackupProvider.BACKBLAZE, login="ID", key=test_key, location=test_location
)
assert isinstance(test_provider, Backblaze)
assert get_kind(test_provider) == "BACKBLAZE"
assert test_provider.login == test_login
assert test_provider.key == test_key
assert test_provider.location == test_location
Storage.store_provider(test_provider)
restored_provider_model = Storage.load_provider()
assert restored_provider_model.kind == "BACKBLAZE"
assert restored_provider_model.login == test_login
assert restored_provider_model.key == test_key
assert restored_provider_model.location == test_location
2023-04-10 13:22:33 +00:00
restored_provider = Backups._load_provider_redis()
2023-04-10 13:22:33 +00:00
assert isinstance(restored_provider, Backblaze)
assert restored_provider.login == test_login
assert restored_provider.key == test_key
assert restored_provider.location == test_location
# Revert our mess so we can teardown ok
Storage.store_provider(old_provider)
2023-04-12 17:18:12 +00:00
def test_sync(dummy_service):
src = dummy_service.get_folders()[0]
dst = dummy_service.get_folders()[1]
old_files_src = set(listdir(src))
old_files_dst = set(listdir(dst))
assert old_files_src != old_files_dst
sync(src, dst)
new_files_src = set(listdir(src))
new_files_dst = set(listdir(dst))
assert new_files_src == old_files_src
assert new_files_dst == new_files_src
def test_sync_nonexistent_src(dummy_service):
src = "/var/lib/nonexistentFluffyBunniesOfUnix"
dst = dummy_service.get_folders()[1]
with pytest.raises(ValueError):
sync(src, dst)
2023-07-03 15:28:12 +00:00
def test_move_blocks_backups(backups, dummy_service, restore_strategy):
snap = Backups.back_up(dummy_service)
job = Jobs.add(
type_id=f"services.{dummy_service.get_id()}.move",
name="Move Dummy",
description=f"Moving Dummy data to the Rainbow Land",
status=JobStatus.RUNNING,
)
with pytest.raises(ValueError):
Backups.back_up(dummy_service)
with pytest.raises(ValueError):
Backups.restore_snapshot(snap, restore_strategy)
def test_double_lock_unlock(backups, dummy_service):
# notice that introducing stale locks is only safe for other tests if we erase repo in between
# which we do at the time of writing this test
Backups.provider().backupper.lock()
with pytest.raises(ValueError):
Backups.provider().backupper.lock()
Backups.provider().backupper.unlock()
Backups.provider().backupper.lock()
Backups.provider().backupper.unlock()
Backups.provider().backupper.unlock()
def test_operations_while_locked(backups, dummy_service):
# Stale lock prevention test
# consider making it fully at the level of backupper?
# because this is where prevention lives?
# Backups singleton is here only so that we can run this against B2, S3 and whatever
# But maybe it is not necessary (if restic treats them uniformly enough)
Backups.provider().backupper.lock()
snap = Backups.back_up(dummy_service)
assert snap is not None
Backups.provider().backupper.lock()
# using lowlevel to make sure no caching interferes
assert Backups.provider().backupper.is_initted() is True
Backups.provider().backupper.lock()
assert Backups.snapshot_restored_size(snap.id) > 0
Backups.provider().backupper.lock()
Backups.restore_snapshot(snap)
Backups.provider().backupper.lock()
Backups.forget_snapshot(snap)
Backups.provider().backupper.lock()
assert Backups.provider().backupper.get_snapshots() == []
# check that no locks were left
Backups.provider().backupper.lock()
Backups.provider().backupper.unlock()
# a paranoid check to weed out problems with tempdirs that are not dependent on us
def test_tempfile():
with tempfile.TemporaryDirectory() as temp:
assert path.exists(temp)
assert not path.exists(temp)
# Storage
def test_cache_invalidaton_task(backups, dummy_service):
Backups.back_up(dummy_service)
assert len(Storage.get_cached_snapshots()) == 1
# Does not trigger resync
Storage.invalidate_snapshot_storage()
assert Storage.get_cached_snapshots() == []
reload_snapshot_cache()
assert len(Storage.get_cached_snapshots()) == 1