mirror of
https://git.selfprivacy.org/SelfPrivacy/selfprivacy-rest-api.git
synced 2024-11-18 16:39:13 +00:00
Merge pull request 'Fixes for restoring potentially failed services' (#57) from fix-restores-wrt-service-status into master
Reviewed-on: https://git.selfprivacy.org/SelfPrivacy/selfprivacy-rest-api/pulls/57
This commit is contained in:
commit
5fd4daa3e7
|
@ -283,7 +283,7 @@ class Backups:
|
|||
Backups._store_last_snapshot(tag, snapshot)
|
||||
service.post_restore()
|
||||
except Exception as error:
|
||||
Jobs.update(job, status=JobStatus.ERROR)
|
||||
Jobs.update(job, status=JobStatus.ERROR, status_text=str(error))
|
||||
raise error
|
||||
|
||||
Jobs.update(job, status=JobStatus.FINISHED)
|
||||
|
@ -306,9 +306,14 @@ class Backups:
|
|||
snapshot: Snapshot,
|
||||
job: Job,
|
||||
) -> None:
|
||||
Jobs.update(
|
||||
job, status=JobStatus.CREATED, status_text=f"Waiting for pre-restore backup"
|
||||
)
|
||||
failsafe_snapshot = Backups.back_up(service)
|
||||
|
||||
Jobs.update(job, status=JobStatus.RUNNING)
|
||||
Jobs.update(
|
||||
job, status=JobStatus.RUNNING, status_text=f"Restoring from {snapshot.id}"
|
||||
)
|
||||
try:
|
||||
Backups._restore_service_from_snapshot(
|
||||
service,
|
||||
|
@ -316,9 +321,19 @@ class Backups:
|
|||
verify=False,
|
||||
)
|
||||
except Exception as error:
|
||||
Jobs.update(
|
||||
job,
|
||||
status=JobStatus.ERROR,
|
||||
status_text=f"Restore failed with {str(error)}, reverting to {failsafe_snapshot.id}",
|
||||
)
|
||||
Backups._restore_service_from_snapshot(
|
||||
service, failsafe_snapshot.id, verify=False
|
||||
)
|
||||
Jobs.update(
|
||||
job,
|
||||
status=JobStatus.ERROR,
|
||||
status_text=f"Restore failed with {str(error)}, reverted to {failsafe_snapshot.id}",
|
||||
)
|
||||
raise error
|
||||
|
||||
@staticmethod
|
||||
|
@ -335,20 +350,33 @@ class Backups:
|
|||
|
||||
try:
|
||||
Backups._assert_restorable(snapshot)
|
||||
Jobs.update(
|
||||
job, status=JobStatus.RUNNING, status_text="Stopping the service"
|
||||
)
|
||||
with StoppedService(service):
|
||||
Backups.assert_dead(service)
|
||||
if strategy == RestoreStrategy.INPLACE:
|
||||
Backups._inplace_restore(service, snapshot, job)
|
||||
else: # verify_before_download is our default
|
||||
Jobs.update(job, status=JobStatus.RUNNING)
|
||||
Jobs.update(
|
||||
job,
|
||||
status=JobStatus.RUNNING,
|
||||
status_text=f"Restoring from {snapshot.id}",
|
||||
)
|
||||
Backups._restore_service_from_snapshot(
|
||||
service, snapshot.id, verify=True
|
||||
)
|
||||
|
||||
service.post_restore()
|
||||
Jobs.update(
|
||||
job,
|
||||
status=JobStatus.RUNNING,
|
||||
progress=90,
|
||||
status_text="Restarting the service",
|
||||
)
|
||||
|
||||
except Exception as error:
|
||||
Jobs.update(job, status=JobStatus.ERROR)
|
||||
Jobs.update(job, status=JobStatus.ERROR, status_text=str(error))
|
||||
raise error
|
||||
|
||||
Jobs.update(job, status=JobStatus.FINISHED)
|
||||
|
|
|
@ -13,7 +13,7 @@ from selfprivacy_api.services.owned_path import OwnedPath
|
|||
from selfprivacy_api import utils
|
||||
from selfprivacy_api.utils.waitloop import wait_until_true
|
||||
|
||||
DEFAULT_START_STOP_TIMEOUT = 10 * 60
|
||||
DEFAULT_START_STOP_TIMEOUT = 5 * 60
|
||||
|
||||
|
||||
class ServiceStatus(Enum):
|
||||
|
@ -283,18 +283,28 @@ class StoppedService:
|
|||
|
||||
def __enter__(self) -> Service:
|
||||
self.original_status = self.service.get_status()
|
||||
if self.original_status != ServiceStatus.INACTIVE:
|
||||
self.service.stop()
|
||||
wait_until_true(
|
||||
lambda: self.service.get_status() == ServiceStatus.INACTIVE,
|
||||
timeout_sec=DEFAULT_START_STOP_TIMEOUT,
|
||||
)
|
||||
if self.original_status not in [ServiceStatus.INACTIVE, ServiceStatus.FAILED]:
|
||||
try:
|
||||
self.service.stop()
|
||||
wait_until_true(
|
||||
lambda: self.service.get_status() == ServiceStatus.INACTIVE,
|
||||
timeout_sec=DEFAULT_START_STOP_TIMEOUT,
|
||||
)
|
||||
except TimeoutError as error:
|
||||
raise TimeoutError(
|
||||
f"timed out waiting for {self.service.get_display_name()} to stop"
|
||||
) from error
|
||||
return self.service
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
if self.original_status in [ServiceStatus.ACTIVATING, ServiceStatus.ACTIVE]:
|
||||
self.service.start()
|
||||
wait_until_true(
|
||||
lambda: self.service.get_status() == ServiceStatus.ACTIVE,
|
||||
timeout_sec=DEFAULT_START_STOP_TIMEOUT,
|
||||
)
|
||||
try:
|
||||
self.service.start()
|
||||
wait_until_true(
|
||||
lambda: self.service.get_status() == ServiceStatus.ACTIVE,
|
||||
timeout_sec=DEFAULT_START_STOP_TIMEOUT,
|
||||
)
|
||||
except TimeoutError as error:
|
||||
raise TimeoutError(
|
||||
f"timed out waiting for {self.service.get_display_name()} to start"
|
||||
) from error
|
||||
|
|
|
@ -135,8 +135,12 @@ class DummyService(Service):
|
|||
|
||||
@classmethod
|
||||
def stop(cls):
|
||||
cls.set_status(ServiceStatus.DEACTIVATING)
|
||||
cls.change_status_with_async_delay(ServiceStatus.INACTIVE, cls.startstop_delay)
|
||||
# simulate a failing service unable to stop
|
||||
if not cls.get_status() == ServiceStatus.FAILED:
|
||||
cls.set_status(ServiceStatus.DEACTIVATING)
|
||||
cls.change_status_with_async_delay(
|
||||
ServiceStatus.INACTIVE, cls.startstop_delay
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def start(cls):
|
||||
|
|
|
@ -12,6 +12,7 @@ import tempfile
|
|||
|
||||
import selfprivacy_api.services as services
|
||||
from selfprivacy_api.services import Service, get_all_services
|
||||
from selfprivacy_api.services.service import ServiceStatus
|
||||
|
||||
from selfprivacy_api.services import get_service_by_id
|
||||
from selfprivacy_api.services.test_service import DummyService
|
||||
|
@ -464,10 +465,19 @@ def restore_strategy(request) -> RestoreStrategy:
|
|||
return RestoreStrategy.INPLACE
|
||||
|
||||
|
||||
@pytest.fixture(params=["failed", "healthy"])
|
||||
def failed(request) -> bool:
|
||||
if request.param == "failed":
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def test_restore_snapshot_task(
|
||||
backups, dummy_service, restore_strategy, simulated_service_stopping_delay
|
||||
backups, dummy_service, restore_strategy, simulated_service_stopping_delay, failed
|
||||
):
|
||||
dummy_service.set_delay(simulated_service_stopping_delay)
|
||||
if failed:
|
||||
dummy_service.set_status(ServiceStatus.FAILED)
|
||||
|
||||
Backups.back_up(dummy_service)
|
||||
snaps = Backups.get_snapshots(dummy_service)
|
||||
|
|
Loading…
Reference in a new issue