diff --git a/selfprivacy_api/backup/__init__.py b/selfprivacy_api/backup/__init__.py index b16f089..73f74a9 100644 --- a/selfprivacy_api/backup/__init__.py +++ b/selfprivacy_api/backup/__init__.py @@ -23,7 +23,18 @@ from selfprivacy_api.jobs import Jobs, JobStatus, Job from selfprivacy_api.graphql.queries.providers import ( BackupProvider as BackupProviderEnum, ) -from selfprivacy_api.graphql.common_types.backup import RestoreStrategy, BackupReason +from selfprivacy_api.graphql.common_types.backup import ( + RestoreStrategy, + BackupReason, + AutobackupQuotas, +) +from selfprivacy_api.backup.time import ( + same_day, + same_month, + same_week, + same_year, + same_lifetime_of_the_universe, +) from selfprivacy_api.models.backup.snapshot import Snapshot @@ -303,20 +314,88 @@ class Backups: if snap.reason == BackupReason.AUTO ] + @staticmethod + def add_snap_but_with_quotas( + new_snap: Snapshot, snaps: List[Snapshot], quotas: AutobackupQuotas + ) -> None: + quotas_map = { + same_day: quotas.daily, + same_week: quotas.weekly, + same_month: quotas.monthly, + same_year: quotas.yearly, + same_lifetime_of_the_universe: quotas.total, + } + + snaps.append(new_snap) + + for is_same_period, quota in quotas_map.items(): + if quota <= 0: + continue + + cohort = [ + snap + for snap in snaps + if is_same_period(snap.created_at, new_snap.created_at) + ] + sorted_cohort = sorted(cohort, key=lambda s: s.created_at) + n_to_kill = len(cohort) - quota + if n_to_kill > 0: + snaps_to_kill = sorted_cohort[:n_to_kill] + for snap in snaps_to_kill: + snaps.remove(snap) + + @staticmethod + def _prune_snaps_with_quotas(snapshots: List[Snapshot]) -> List[Snapshot]: + # Function broken out for testability + sorted_snaps = sorted(snapshots, key=lambda s: s.created_at) + quotas = Backups.autobackup_quotas() + + new_snaplist: List[Snapshot] = [] + for snap in sorted_snaps: + Backups.add_snap_but_with_quotas(snap, new_snaplist, quotas) + + return new_snaplist + @staticmethod def _prune_auto_snaps(service) -> None: - max = Backups.max_auto_snapshots() - if max == -1: - return + # Not very testable by itself, so most testing is going on Backups._prune_snaps_with_quotas + # We can still test total limits and, say, daily limits auto_snaps = Backups._auto_snaps(service) - if len(auto_snaps) > max: - n_to_kill = len(auto_snaps) - max - sorted_snaps = sorted(auto_snaps, key=lambda s: s.created_at) - snaps_to_kill = sorted_snaps[:n_to_kill] - for snap in snaps_to_kill: + new_snaplist = Backups._prune_snaps_with_quotas(auto_snaps) + + # TODO: Can be optimized since there is forgetting of an array in one restic op + # but most of the time this will be only one snap to forget. + for snap in auto_snaps: + if snap not in new_snaplist: Backups.forget_snapshot(snap) + @staticmethod + def _standardize_quotas(i: int) -> int: + if i <= 0: + i = -1 + return i + + @staticmethod + def autobackup_quotas() -> AutobackupQuotas: + """everything <=0 means unlimited""" + + return Storage.autobackup_quotas() + + @staticmethod + def set_autobackup_quotas(quotas: AutobackupQuotas) -> None: + """everything <=0 means unlimited""" + + Storage.set_autobackup_quotas( + AutobackupQuotas( + daily=Backups._standardize_quotas(quotas.daily), + weekly=Backups._standardize_quotas(quotas.weekly), + monthly=Backups._standardize_quotas(quotas.monthly), + yearly=Backups._standardize_quotas(quotas.yearly), + total=Backups._standardize_quotas(quotas.total), + ) + ) + @staticmethod def set_max_auto_snapshots(value: int) -> None: """everything <=0 means unlimited""" diff --git a/selfprivacy_api/backup/storage.py b/selfprivacy_api/backup/storage.py index 1a0091f..38fc3a2 100644 --- a/selfprivacy_api/backup/storage.py +++ b/selfprivacy_api/backup/storage.py @@ -6,6 +6,10 @@ from datetime import datetime from selfprivacy_api.models.backup.snapshot import Snapshot from selfprivacy_api.models.backup.provider import BackupProviderModel +from selfprivacy_api.graphql.common_types.backup import ( + AutobackupQuotas, + _AutobackupQuotas, +) from selfprivacy_api.utils.redis_pool import RedisPool from selfprivacy_api.utils.redis_model_storage import ( @@ -27,6 +31,7 @@ REDIS_PROVIDER_KEY = "backups:provider" REDIS_AUTOBACKUP_PERIOD_KEY = "backups:autobackup_period" REDIS_AUTOBACKUP_MAX_KEY = "backups:autobackup_cap" +REDIS_AUTOBACKUP_QUOTAS_KEY = "backups:autobackup_quotas_key" redis = RedisPool().get_connection() @@ -41,6 +46,7 @@ class Storage: redis.delete(REDIS_AUTOBACKUP_PERIOD_KEY) redis.delete(REDIS_INITTED_CACHE) redis.delete(REDIS_AUTOBACKUP_MAX_KEY) + redis.delete(REDIS_AUTOBACKUP_QUOTAS_KEY) prefixes_to_clean = [ REDIS_SNAPSHOTS_PREFIX, @@ -178,6 +184,26 @@ class Storage: """Marks the repository as initialized""" redis.delete(REDIS_INITTED_CACHE) + @staticmethod + def set_autobackup_quotas(quotas: AutobackupQuotas) -> None: + store_model_as_hash(redis, REDIS_AUTOBACKUP_QUOTAS_KEY, quotas.to_pydantic()) + + @staticmethod + def autobackup_quotas() -> AutobackupQuotas: + quotas_model = hash_as_model( + redis, REDIS_AUTOBACKUP_QUOTAS_KEY, _AutobackupQuotas + ) + if quotas_model is None: + unlimited_quotas = AutobackupQuotas( + daily=-1, + weekly=-1, + monthly=-1, + yearly=-1, + total=-1, + ) + return unlimited_quotas + return AutobackupQuotas.from_pydantic(quotas_model) + @staticmethod def set_max_auto_snapshots(value: int): redis.set(REDIS_AUTOBACKUP_MAX_KEY, value) diff --git a/selfprivacy_api/graphql/common_types/backup.py b/selfprivacy_api/graphql/common_types/backup.py index 9eaef12..3d5b5aa 100644 --- a/selfprivacy_api/graphql/common_types/backup.py +++ b/selfprivacy_api/graphql/common_types/backup.py @@ -2,6 +2,7 @@ # pylint: disable=too-few-public-methods import strawberry from enum import Enum +from pydantic import BaseModel @strawberry.enum @@ -15,3 +16,16 @@ class BackupReason(Enum): EXPLICIT = "EXPLICIT" AUTO = "AUTO" PRE_RESTORE = "PRE_RESTORE" + + +class _AutobackupQuotas(BaseModel): + daily: int + weekly: int + monthly: int + yearly: int + total: int + + +@strawberry.experimental.pydantic.type(model=_AutobackupQuotas, all_fields=True) +class AutobackupQuotas: + pass diff --git a/tests/test_graphql/test_backup.py b/tests/test_graphql/test_backup.py index 781468a..3314597 100644 --- a/tests/test_graphql/test_backup.py +++ b/tests/test_graphql/test_backup.py @@ -5,8 +5,12 @@ from os import makedirs from os import remove from os import listdir from os import urandom -from datetime import datetime, timedelta, timezone +from datetime import datetime, timedelta, timezone, date, time from subprocess import Popen +from copy import copy + +import secrets + import selfprivacy_api.services as services from selfprivacy_api.services import Service, get_all_services @@ -19,6 +23,8 @@ from selfprivacy_api.jobs import Jobs, JobStatus from selfprivacy_api.models.backup.snapshot import Snapshot +from selfprivacy_api.graphql.common_types.backup import AutobackupQuotas + from selfprivacy_api.backup import Backups, BACKUP_PROVIDER_ENVS import selfprivacy_api.backup.providers as providers from selfprivacy_api.backup.providers import AbstractBackupProvider @@ -298,29 +304,215 @@ def test_backup_reasons(backups, dummy_service): assert snaps[0].reason == BackupReason.AUTO -def test_too_many_auto(backups, dummy_service): - assert Backups.max_auto_snapshots() == -1 - Backups.set_max_auto_snapshots(2) - assert Backups.max_auto_snapshots() == 2 +unlimited_quotas = AutobackupQuotas( + daily=-1, + weekly=-1, + monthly=-1, + yearly=-1, + total=-1, +) - snap = Backups.back_up(dummy_service, BackupReason.AUTO) - assert len(Backups.get_snapshots(dummy_service)) == 1 - snap2 = Backups.back_up(dummy_service, BackupReason.AUTO) - assert len(Backups.get_snapshots(dummy_service)) == 2 - snap3 = Backups.back_up(dummy_service, BackupReason.AUTO) - assert len(Backups.get_snapshots(dummy_service)) == 2 - snaps = Backups.get_snapshots(dummy_service) +def test_get_empty_quotas(backups): + quotas = Backups.autobackup_quotas() + assert quotas is not None + assert quotas == unlimited_quotas - assert snap2 in snaps - assert snap3 in snaps - assert snap not in snaps - Backups.set_max_auto_snapshots(-1) - snap4 = Backups.back_up(dummy_service, BackupReason.AUTO) - snaps = Backups.get_snapshots(dummy_service) - assert len(snaps) == 3 - assert snap4 in snaps +def test_set_quotas(backups): + quotas = AutobackupQuotas( + daily=2343, + weekly=343, + monthly=0, + yearly=-34556, + total=563, + ) + Backups.set_autobackup_quotas(quotas) + assert Backups.autobackup_quotas() == AutobackupQuotas( + daily=2343, + weekly=343, + monthly=-1, + yearly=-1, + total=563, + ) + + +def dummy_snapshot(date: datetime): + return Snapshot( + id=str(hash(date)), + service_name="someservice", + created_at=date, + reason=BackupReason.EXPLICIT, + ) + + +def test_autobackup_snapshots_pruning(backups): + # Wednesday, fourth week + now = datetime(year=2023, month=1, day=25, hour=10) + + snaps = [ + dummy_snapshot(now - timedelta(days=365 * 2)), + dummy_snapshot(now - timedelta(days=20)), + dummy_snapshot(now - timedelta(days=2)), + dummy_snapshot(now - timedelta(days=1, hours=3)), + dummy_snapshot(now - timedelta(days=1, hours=2)), + dummy_snapshot(now - timedelta(days=1)), + dummy_snapshot(now - timedelta(hours=2)), + dummy_snapshot(now - timedelta(minutes=5)), + dummy_snapshot(now), + ] + old_len = len(snaps) + + quotas = copy(unlimited_quotas) + Backups.set_autobackup_quotas(quotas) + assert Backups._prune_snaps_with_quotas(snaps) == snaps + + quotas = copy(unlimited_quotas) + quotas.daily = 2 + Backups.set_autobackup_quotas(quotas) + + pruned_snaps = Backups._prune_snaps_with_quotas(snaps) + assert pruned_snaps == [ + dummy_snapshot(now - timedelta(days=365 * 2)), + dummy_snapshot(now - timedelta(days=20)), + dummy_snapshot(now - timedelta(days=2)), + dummy_snapshot(now - timedelta(days=1, hours=2)), + dummy_snapshot(now - timedelta(days=1)), + dummy_snapshot(now - timedelta(minutes=5)), + dummy_snapshot(now), + ] + + # checking that this function does not mutate the argument + assert snaps != pruned_snaps + assert len(snaps) == old_len + + quotas = copy(unlimited_quotas) + quotas.weekly = 4 + Backups.set_autobackup_quotas(quotas) + + pruned_snaps = Backups._prune_snaps_with_quotas(snaps) + assert pruned_snaps == [ + dummy_snapshot(now - timedelta(days=365 * 2)), + dummy_snapshot(now - timedelta(days=20)), + dummy_snapshot(now - timedelta(days=1)), + dummy_snapshot(now - timedelta(hours=2)), + dummy_snapshot(now - timedelta(minutes=5)), + dummy_snapshot(now), + ] + + quotas = copy(unlimited_quotas) + quotas.monthly = 7 + Backups.set_autobackup_quotas(quotas) + + pruned_snaps = Backups._prune_snaps_with_quotas(snaps) + assert pruned_snaps == [ + dummy_snapshot(now - timedelta(days=365 * 2)), + dummy_snapshot(now - timedelta(days=2)), + dummy_snapshot(now - timedelta(days=1, hours=3)), + dummy_snapshot(now - timedelta(days=1, hours=2)), + dummy_snapshot(now - timedelta(days=1)), + dummy_snapshot(now - timedelta(hours=2)), + dummy_snapshot(now - timedelta(minutes=5)), + dummy_snapshot(now), + ] + + +def test_autobackup_snapshots_pruning_yearly(backups): + snaps = [ + dummy_snapshot(datetime(year=2023, month=2, day=1)), + dummy_snapshot(datetime(year=2023, month=3, day=1)), + dummy_snapshot(datetime(year=2023, month=4, day=1)), + dummy_snapshot(datetime(year=2055, month=3, day=1)), + ] + quotas = copy(unlimited_quotas) + quotas.yearly = 2 + Backups.set_autobackup_quotas(quotas) + + pruned_snaps = Backups._prune_snaps_with_quotas(snaps) + assert pruned_snaps == [ + dummy_snapshot(datetime(year=2023, month=3, day=1)), + dummy_snapshot(datetime(year=2023, month=4, day=1)), + dummy_snapshot(datetime(year=2055, month=3, day=1)), + ] + + +def test_autobackup_snapshots_pruning_bottleneck(backups): + now = datetime(year=2023, month=1, day=25, hour=10) + snaps = [ + dummy_snapshot(now - timedelta(hours=4)), + dummy_snapshot(now - timedelta(hours=3)), + dummy_snapshot(now - timedelta(hours=2)), + dummy_snapshot(now - timedelta(minutes=5)), + dummy_snapshot(now), + ] + + yearly_quota = copy(unlimited_quotas) + yearly_quota.yearly = 2 + + monthly_quota = copy(unlimited_quotas) + monthly_quota.monthly = 2 + + weekly_quota = copy(unlimited_quotas) + weekly_quota.weekly = 2 + + daily_quota = copy(unlimited_quotas) + daily_quota.daily = 2 + + total_quota = copy(unlimited_quotas) + total_quota.total = 2 + + for quota in [total_quota, yearly_quota, monthly_quota, weekly_quota, daily_quota]: + Backups.set_autobackup_quotas(quota) + pruned_snaps = Backups._prune_snaps_with_quotas(snaps) + assert pruned_snaps == [ + dummy_snapshot(now - timedelta(minutes=5)), + dummy_snapshot(now), + ] + + +def test_autobackup_snapshots_pruning_edgeweek(backups): + # jan 1 2023 is Sunday + snaps = [ + dummy_snapshot(datetime(year=2022, month=12, day=30)), + dummy_snapshot(datetime(year=2022, month=12, day=31)), + dummy_snapshot(datetime(year=2023, month=1, day=1)), + dummy_snapshot(datetime(year=2023, month=1, day=6)), + ] + quotas = copy(unlimited_quotas) + quotas.weekly = 2 + Backups.set_autobackup_quotas(quotas) + + pruned_snaps = Backups._prune_snaps_with_quotas(snaps) + assert pruned_snaps == [ + dummy_snapshot(datetime(year=2022, month=12, day=31)), + dummy_snapshot(datetime(year=2023, month=1, day=1)), + dummy_snapshot(datetime(year=2023, month=1, day=6)), + ] + + +# def test_too_many_auto(backups, dummy_service): +# assert Backups.autobackup_quotas() +# Backups.set_max_auto_snapshots(2) +# assert Backups.max_auto_snapshots() == 2 + +# snap = Backups.back_up(dummy_service, BackupReason.AUTO) +# assert len(Backups.get_snapshots(dummy_service)) == 1 +# snap2 = Backups.back_up(dummy_service, BackupReason.AUTO) +# assert len(Backups.get_snapshots(dummy_service)) == 2 +# snap3 = Backups.back_up(dummy_service, BackupReason.AUTO) +# assert len(Backups.get_snapshots(dummy_service)) == 2 + +# snaps = Backups.get_snapshots(dummy_service) + +# assert snap2 in snaps +# assert snap3 in snaps +# assert snap not in snaps + +# Backups.set_max_auto_snapshots(-1) +# snap4 = Backups.back_up(dummy_service, BackupReason.AUTO) +# snaps = Backups.get_snapshots(dummy_service) +# assert len(snaps) == 3 +# assert snap4 in snaps def folder_files(folder):