diff --git a/selfprivacy_api/backup/__init__.py b/selfprivacy_api/backup/__init__.py index c28c01f..7b013f4 100644 --- a/selfprivacy_api/backup/__init__.py +++ b/selfprivacy_api/backup/__init__.py @@ -283,7 +283,7 @@ class Backups: Backups._store_last_snapshot(tag, snapshot) service.post_restore() except Exception as error: - Jobs.update(job, status=JobStatus.ERROR) + Jobs.update(job, status=JobStatus.ERROR, status_text=str(error)) raise error Jobs.update(job, status=JobStatus.FINISHED) @@ -306,9 +306,14 @@ class Backups: snapshot: Snapshot, job: Job, ) -> None: + Jobs.update( + job, status=JobStatus.CREATED, status_text=f"Waiting for pre-restore backup" + ) failsafe_snapshot = Backups.back_up(service) - Jobs.update(job, status=JobStatus.RUNNING) + Jobs.update( + job, status=JobStatus.RUNNING, status_text=f"Restoring from {snapshot.id}" + ) try: Backups._restore_service_from_snapshot( service, @@ -316,9 +321,19 @@ class Backups: verify=False, ) except Exception as error: + Jobs.update( + job, + status=JobStatus.ERROR, + status_text=f"Restore failed with {str(error)}, reverting to {failsafe_snapshot.id}", + ) Backups._restore_service_from_snapshot( service, failsafe_snapshot.id, verify=False ) + Jobs.update( + job, + status=JobStatus.ERROR, + status_text=f"Restore failed with {str(error)}, reverted to {failsafe_snapshot.id}", + ) raise error @staticmethod @@ -335,20 +350,33 @@ class Backups: try: Backups._assert_restorable(snapshot) + Jobs.update( + job, status=JobStatus.RUNNING, status_text="Stopping the service" + ) with StoppedService(service): Backups.assert_dead(service) if strategy == RestoreStrategy.INPLACE: Backups._inplace_restore(service, snapshot, job) else: # verify_before_download is our default - Jobs.update(job, status=JobStatus.RUNNING) + Jobs.update( + job, + status=JobStatus.RUNNING, + status_text=f"Restoring from {snapshot.id}", + ) Backups._restore_service_from_snapshot( service, snapshot.id, verify=True ) service.post_restore() + Jobs.update( + job, + status=JobStatus.RUNNING, + progress=90, + status_text="Restarting the service", + ) except Exception as error: - Jobs.update(job, status=JobStatus.ERROR) + Jobs.update(job, status=JobStatus.ERROR, status_text=str(error)) raise error Jobs.update(job, status=JobStatus.FINISHED) diff --git a/selfprivacy_api/services/service.py b/selfprivacy_api/services/service.py index 30e810f..b66bd19 100644 --- a/selfprivacy_api/services/service.py +++ b/selfprivacy_api/services/service.py @@ -13,7 +13,7 @@ from selfprivacy_api.services.owned_path import OwnedPath from selfprivacy_api import utils from selfprivacy_api.utils.waitloop import wait_until_true -DEFAULT_START_STOP_TIMEOUT = 10 * 60 +DEFAULT_START_STOP_TIMEOUT = 5 * 60 class ServiceStatus(Enum): @@ -283,18 +283,28 @@ class StoppedService: def __enter__(self) -> Service: self.original_status = self.service.get_status() - if self.original_status != ServiceStatus.INACTIVE: - self.service.stop() - wait_until_true( - lambda: self.service.get_status() == ServiceStatus.INACTIVE, - timeout_sec=DEFAULT_START_STOP_TIMEOUT, - ) + if self.original_status not in [ServiceStatus.INACTIVE, ServiceStatus.FAILED]: + try: + self.service.stop() + wait_until_true( + lambda: self.service.get_status() == ServiceStatus.INACTIVE, + timeout_sec=DEFAULT_START_STOP_TIMEOUT, + ) + except TimeoutError as error: + raise TimeoutError( + f"timed out waiting for {self.service.get_display_name()} to stop" + ) from error return self.service def __exit__(self, type, value, traceback): if self.original_status in [ServiceStatus.ACTIVATING, ServiceStatus.ACTIVE]: - self.service.start() - wait_until_true( - lambda: self.service.get_status() == ServiceStatus.ACTIVE, - timeout_sec=DEFAULT_START_STOP_TIMEOUT, - ) + try: + self.service.start() + wait_until_true( + lambda: self.service.get_status() == ServiceStatus.ACTIVE, + timeout_sec=DEFAULT_START_STOP_TIMEOUT, + ) + except TimeoutError as error: + raise TimeoutError( + f"timed out waiting for {self.service.get_display_name()} to start" + ) from error diff --git a/selfprivacy_api/services/test_service/__init__.py b/selfprivacy_api/services/test_service/__init__.py index 967b32e..6ae33ef 100644 --- a/selfprivacy_api/services/test_service/__init__.py +++ b/selfprivacy_api/services/test_service/__init__.py @@ -135,8 +135,12 @@ class DummyService(Service): @classmethod def stop(cls): - cls.set_status(ServiceStatus.DEACTIVATING) - cls.change_status_with_async_delay(ServiceStatus.INACTIVE, cls.startstop_delay) + # simulate a failing service unable to stop + if not cls.get_status() == ServiceStatus.FAILED: + cls.set_status(ServiceStatus.DEACTIVATING) + cls.change_status_with_async_delay( + ServiceStatus.INACTIVE, cls.startstop_delay + ) @classmethod def start(cls): diff --git a/tests/test_graphql/test_backup.py b/tests/test_graphql/test_backup.py index 6878de1..dc491c4 100644 --- a/tests/test_graphql/test_backup.py +++ b/tests/test_graphql/test_backup.py @@ -12,6 +12,7 @@ import tempfile import selfprivacy_api.services as services from selfprivacy_api.services import Service, get_all_services +from selfprivacy_api.services.service import ServiceStatus from selfprivacy_api.services import get_service_by_id from selfprivacy_api.services.test_service import DummyService @@ -464,10 +465,19 @@ def restore_strategy(request) -> RestoreStrategy: return RestoreStrategy.INPLACE +@pytest.fixture(params=["failed", "healthy"]) +def failed(request) -> bool: + if request.param == "failed": + return True + return False + + def test_restore_snapshot_task( - backups, dummy_service, restore_strategy, simulated_service_stopping_delay + backups, dummy_service, restore_strategy, simulated_service_stopping_delay, failed ): dummy_service.set_delay(simulated_service_stopping_delay) + if failed: + dummy_service.set_status(ServiceStatus.FAILED) Backups.back_up(dummy_service) snaps = Backups.get_snapshots(dummy_service)