From 0a852d8b509cd95edaad498a85efa4f7191795db Mon Sep 17 00:00:00 2001 From: Houkime <> Date: Wed, 23 Aug 2023 13:39:12 +0000 Subject: [PATCH 1/7] fix(backups): consider failing services MORE and not try to stop them --- selfprivacy_api/services/service.py | 2 +- selfprivacy_api/services/test_service/__init__.py | 8 ++++++-- tests/test_graphql/test_backup.py | 12 +++++++++++- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/selfprivacy_api/services/service.py b/selfprivacy_api/services/service.py index 30e810f..e6f51d3 100644 --- a/selfprivacy_api/services/service.py +++ b/selfprivacy_api/services/service.py @@ -283,7 +283,7 @@ class StoppedService: def __enter__(self) -> Service: self.original_status = self.service.get_status() - if self.original_status != ServiceStatus.INACTIVE: + if self.original_status not in [ServiceStatus.INACTIVE, ServiceStatus.FAILED]: self.service.stop() wait_until_true( lambda: self.service.get_status() == ServiceStatus.INACTIVE, diff --git a/selfprivacy_api/services/test_service/__init__.py b/selfprivacy_api/services/test_service/__init__.py index 967b32e..6ae33ef 100644 --- a/selfprivacy_api/services/test_service/__init__.py +++ b/selfprivacy_api/services/test_service/__init__.py @@ -135,8 +135,12 @@ class DummyService(Service): @classmethod def stop(cls): - cls.set_status(ServiceStatus.DEACTIVATING) - cls.change_status_with_async_delay(ServiceStatus.INACTIVE, cls.startstop_delay) + # simulate a failing service unable to stop + if not cls.get_status() == ServiceStatus.FAILED: + cls.set_status(ServiceStatus.DEACTIVATING) + cls.change_status_with_async_delay( + ServiceStatus.INACTIVE, cls.startstop_delay + ) @classmethod def start(cls): diff --git a/tests/test_graphql/test_backup.py b/tests/test_graphql/test_backup.py index 1990ef7..f66398d 100644 --- a/tests/test_graphql/test_backup.py +++ b/tests/test_graphql/test_backup.py @@ -10,6 +10,7 @@ from subprocess import Popen import selfprivacy_api.services as services from selfprivacy_api.services import Service, get_all_services +from selfprivacy_api.services.service import ServiceStatus from selfprivacy_api.services import get_service_by_id from selfprivacy_api.services.test_service import DummyService @@ -462,10 +463,19 @@ def restore_strategy(request) -> RestoreStrategy: return RestoreStrategy.INPLACE +@pytest.fixture(params=["failed", "healthy"]) +def failed(request) -> bool: + if request.param == "failed": + return True + return False + + def test_restore_snapshot_task( - backups, dummy_service, restore_strategy, simulated_service_stopping_delay + backups, dummy_service, restore_strategy, simulated_service_stopping_delay, failed ): dummy_service.set_delay(simulated_service_stopping_delay) + if failed: + dummy_service.set_status(ServiceStatus.FAILED) Backups.back_up(dummy_service) snaps = Backups.get_snapshots(dummy_service) From 72535f86558ff90e7ad9ea964a282ec9a0044099 Mon Sep 17 00:00:00 2001 From: Houkime <> Date: Wed, 23 Aug 2023 13:40:04 +0000 Subject: [PATCH 2/7] fix(backups): default timeout to 5 min for service starting and stopping in backup operations --- selfprivacy_api/services/service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selfprivacy_api/services/service.py b/selfprivacy_api/services/service.py index e6f51d3..e01501b 100644 --- a/selfprivacy_api/services/service.py +++ b/selfprivacy_api/services/service.py @@ -13,7 +13,7 @@ from selfprivacy_api.services.owned_path import OwnedPath from selfprivacy_api import utils from selfprivacy_api.utils.waitloop import wait_until_true -DEFAULT_START_STOP_TIMEOUT = 10 * 60 +DEFAULT_START_STOP_TIMEOUT = 5 * 60 class ServiceStatus(Enum): From de52dffddadab70dc09cf5fcd853324f215c9ad7 Mon Sep 17 00:00:00 2001 From: Houkime <> Date: Wed, 23 Aug 2023 13:55:23 +0000 Subject: [PATCH 3/7] refactor(backups): a better backup-related service timeout error --- selfprivacy_api/services/service.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/selfprivacy_api/services/service.py b/selfprivacy_api/services/service.py index e01501b..0f018b6 100644 --- a/selfprivacy_api/services/service.py +++ b/selfprivacy_api/services/service.py @@ -284,17 +284,23 @@ class StoppedService: def __enter__(self) -> Service: self.original_status = self.service.get_status() if self.original_status not in [ServiceStatus.INACTIVE, ServiceStatus.FAILED]: - self.service.stop() - wait_until_true( - lambda: self.service.get_status() == ServiceStatus.INACTIVE, - timeout_sec=DEFAULT_START_STOP_TIMEOUT, - ) + try: + self.service.stop() + wait_until_true( + lambda: self.service.get_status() == ServiceStatus.INACTIVE, + timeout_sec=DEFAULT_START_STOP_TIMEOUT, + ) + except TimeoutError as e: + raise TimeoutError(f"timed out waiting for {self.service.get_display_name()} to stop") return self.service def __exit__(self, type, value, traceback): if self.original_status in [ServiceStatus.ACTIVATING, ServiceStatus.ACTIVE]: - self.service.start() - wait_until_true( - lambda: self.service.get_status() == ServiceStatus.ACTIVE, - timeout_sec=DEFAULT_START_STOP_TIMEOUT, - ) + try: + self.service.start() + wait_until_true( + lambda: self.service.get_status() == ServiceStatus.ACTIVE, + timeout_sec=DEFAULT_START_STOP_TIMEOUT, + ) + except TimeoutError as e: + raise TimeoutError(f"timed out waiting for {self.service.get_display_name()} to start") From 02b03cf401c88e348cb6c2c9582b4d1f9556bf76 Mon Sep 17 00:00:00 2001 From: Houkime <> Date: Wed, 23 Aug 2023 14:02:07 +0000 Subject: [PATCH 4/7] feature(backups): report the error text in a job --- selfprivacy_api/backup/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/selfprivacy_api/backup/__init__.py b/selfprivacy_api/backup/__init__.py index c28c01f..167ff33 100644 --- a/selfprivacy_api/backup/__init__.py +++ b/selfprivacy_api/backup/__init__.py @@ -283,7 +283,7 @@ class Backups: Backups._store_last_snapshot(tag, snapshot) service.post_restore() except Exception as error: - Jobs.update(job, status=JobStatus.ERROR) + Jobs.update(job, status=JobStatus.ERROR, status_text=str(error)) raise error Jobs.update(job, status=JobStatus.FINISHED) @@ -348,7 +348,7 @@ class Backups: service.post_restore() except Exception as error: - Jobs.update(job, status=JobStatus.ERROR) + Jobs.update(job, status=JobStatus.ERROR, status_text=str(error)) raise error Jobs.update(job, status=JobStatus.FINISHED) From c68239044f8c5983a68ae0606ebb9f5ccd07d56c Mon Sep 17 00:00:00 2001 From: Houkime <> Date: Wed, 23 Aug 2023 14:18:33 +0000 Subject: [PATCH 5/7] feature(backups): report status text for restore jobs --- selfprivacy_api/backup/__init__.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/selfprivacy_api/backup/__init__.py b/selfprivacy_api/backup/__init__.py index 167ff33..0f445a8 100644 --- a/selfprivacy_api/backup/__init__.py +++ b/selfprivacy_api/backup/__init__.py @@ -308,7 +308,9 @@ class Backups: ) -> None: failsafe_snapshot = Backups.back_up(service) - Jobs.update(job, status=JobStatus.RUNNING) + Jobs.update( + job, status=JobStatus.RUNNING, status_text=f"Restoring from {snapshot.id}" + ) try: Backups._restore_service_from_snapshot( service, @@ -316,9 +318,19 @@ class Backups: verify=False, ) except Exception as error: + Jobs.update( + job, + status=JobStatus.ERROR, + status_text=f" restore failed with {str(error)}, reverting to {failsafe_snapshot.id}", + ) Backups._restore_service_from_snapshot( service, failsafe_snapshot.id, verify=False ) + Jobs.update( + job, + status=JobStatus.ERROR, + status_text=f" restore failed with {str(error)}, reverted to {failsafe_snapshot.id}", + ) raise error @staticmethod @@ -335,17 +347,30 @@ class Backups: try: Backups._assert_restorable(snapshot) + Jobs.update( + job, status=JobStatus.CREATED, status_text="stopping the service" + ) with StoppedService(service): Backups.assert_dead(service) if strategy == RestoreStrategy.INPLACE: Backups._inplace_restore(service, snapshot, job) else: # verify_before_download is our default - Jobs.update(job, status=JobStatus.RUNNING) + Jobs.update( + job, + status=JobStatus.RUNNING, + status_text=f"Restoring from {snapshot.id}", + ) Backups._restore_service_from_snapshot( service, snapshot.id, verify=True ) service.post_restore() + Jobs.update( + job, + status=JobStatus.RUNNING, + progress=90, + status_text="restarting the service", + ) except Exception as error: Jobs.update(job, status=JobStatus.ERROR, status_text=str(error)) From 1333aad57dac56dea8f0d2b990bc0a66467c9a4b Mon Sep 17 00:00:00 2001 From: Houkime <> Date: Wed, 23 Aug 2023 14:35:03 +0000 Subject: [PATCH 6/7] feature(backups): temporarily revert restore job status to created for inplace restore to run backup --- selfprivacy_api/backup/__init__.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/selfprivacy_api/backup/__init__.py b/selfprivacy_api/backup/__init__.py index 0f445a8..7b013f4 100644 --- a/selfprivacy_api/backup/__init__.py +++ b/selfprivacy_api/backup/__init__.py @@ -306,6 +306,9 @@ class Backups: snapshot: Snapshot, job: Job, ) -> None: + Jobs.update( + job, status=JobStatus.CREATED, status_text=f"Waiting for pre-restore backup" + ) failsafe_snapshot = Backups.back_up(service) Jobs.update( @@ -321,7 +324,7 @@ class Backups: Jobs.update( job, status=JobStatus.ERROR, - status_text=f" restore failed with {str(error)}, reverting to {failsafe_snapshot.id}", + status_text=f"Restore failed with {str(error)}, reverting to {failsafe_snapshot.id}", ) Backups._restore_service_from_snapshot( service, failsafe_snapshot.id, verify=False @@ -329,7 +332,7 @@ class Backups: Jobs.update( job, status=JobStatus.ERROR, - status_text=f" restore failed with {str(error)}, reverted to {failsafe_snapshot.id}", + status_text=f"Restore failed with {str(error)}, reverted to {failsafe_snapshot.id}", ) raise error @@ -348,7 +351,7 @@ class Backups: try: Backups._assert_restorable(snapshot) Jobs.update( - job, status=JobStatus.CREATED, status_text="stopping the service" + job, status=JobStatus.RUNNING, status_text="Stopping the service" ) with StoppedService(service): Backups.assert_dead(service) @@ -369,7 +372,7 @@ class Backups: job, status=JobStatus.RUNNING, progress=90, - status_text="restarting the service", + status_text="Restarting the service", ) except Exception as error: From 9db717c774ff2ddb22d81aad6930caebf53c4c83 Mon Sep 17 00:00:00 2001 From: Inex Code Date: Fri, 25 Aug 2023 19:28:37 +0300 Subject: [PATCH 7/7] style: linting --- selfprivacy_api/services/service.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/selfprivacy_api/services/service.py b/selfprivacy_api/services/service.py index 0f018b6..b66bd19 100644 --- a/selfprivacy_api/services/service.py +++ b/selfprivacy_api/services/service.py @@ -290,8 +290,10 @@ class StoppedService: lambda: self.service.get_status() == ServiceStatus.INACTIVE, timeout_sec=DEFAULT_START_STOP_TIMEOUT, ) - except TimeoutError as e: - raise TimeoutError(f"timed out waiting for {self.service.get_display_name()} to stop") + except TimeoutError as error: + raise TimeoutError( + f"timed out waiting for {self.service.get_display_name()} to stop" + ) from error return self.service def __exit__(self, type, value, traceback): @@ -302,5 +304,7 @@ class StoppedService: lambda: self.service.get_status() == ServiceStatus.ACTIVE, timeout_sec=DEFAULT_START_STOP_TIMEOUT, ) - except TimeoutError as e: - raise TimeoutError(f"timed out waiting for {self.service.get_display_name()} to start") + except TimeoutError as error: + raise TimeoutError( + f"timed out waiting for {self.service.get_display_name()} to start" + ) from error