feat: Track the status of the nixos rebuild systemd unit
continuous-integration/drone/push Build is failing Details

pull/98/head
Inex Code 2024-02-12 18:17:18 +03:00
parent d0eee319d3
commit 2019da1e10
5 changed files with 206 additions and 17 deletions

View File

@ -4,6 +4,8 @@ import subprocess
import pytz
from typing import Optional, List
from pydantic import BaseModel
from selfprivacy_api.jobs import Job, JobStatus, Jobs
from selfprivacy_api.jobs.upgrade_system import rebuild_system_task
from selfprivacy_api.utils import WriteUserData, ReadUserData
@ -87,10 +89,16 @@ def run_blocking(cmd: List[str], new_session: bool = False) -> str:
return stdout
def rebuild_system() -> int:
def rebuild_system() -> Job:
"""Rebuild the system"""
run_blocking(["systemctl", "start", "sp-nixos-rebuild.service"], new_session=True)
return 0
job = Jobs.add(
type_id="system.nixos.rebuild",
name="Rebuild system",
description="Applying the new system configuration by building the new NixOS generation.",
status=JobStatus.CREATED,
)
rebuild_system_task(job)
return job
def rollback_system() -> int:
@ -99,10 +107,16 @@ def rollback_system() -> int:
return 0
def upgrade_system() -> int:
def upgrade_system() -> Job:
"""Upgrade the system"""
run_blocking(["systemctl", "start", "sp-nixos-upgrade.service"], new_session=True)
return 0
job = Jobs.add(
type_id="system.nixos.upgrade",
name="Upgrade system",
description="Upgrading the system to the latest version.",
status=JobStatus.CREATED,
)
rebuild_system_task(job, upgrade=True)
return job
def reboot_system() -> None:

View File

@ -3,7 +3,9 @@
import typing
import strawberry
from selfprivacy_api.graphql import IsAuthenticated
from selfprivacy_api.graphql.common_types.jobs import job_to_api_job
from selfprivacy_api.graphql.mutations.mutation_interface import (
GenericJobMutationReturn,
GenericMutationReturn,
MutationReturnInterface,
)
@ -114,16 +116,17 @@ class SystemMutations:
)
@strawberry.mutation(permission_classes=[IsAuthenticated])
def run_system_rebuild(self) -> GenericMutationReturn:
def run_system_rebuild(self) -> GenericJobMutationReturn:
try:
system_actions.rebuild_system()
return GenericMutationReturn(
job = system_actions.rebuild_system()
return GenericJobMutationReturn(
success=True,
message="Starting rebuild system",
message="Starting system rebuild",
code=200,
job=job_to_api_job(job),
)
except system_actions.ShellException as e:
return GenericMutationReturn(
return GenericJobMutationReturn(
success=False,
message=str(e),
code=500,
@ -135,7 +138,7 @@ class SystemMutations:
try:
return GenericMutationReturn(
success=True,
message="Starting rebuild system",
message="Starting system rollback",
code=200,
)
except system_actions.ShellException as e:
@ -146,16 +149,17 @@ class SystemMutations:
)
@strawberry.mutation(permission_classes=[IsAuthenticated])
def run_system_upgrade(self) -> GenericMutationReturn:
system_actions.upgrade_system()
def run_system_upgrade(self) -> GenericJobMutationReturn:
try:
return GenericMutationReturn(
job = system_actions.upgrade_system()
return GenericJobMutationReturn(
success=True,
message="Starting rebuild system",
message="Starting system upgrade",
code=200,
job=job_to_api_job(job),
)
except system_actions.ShellException as e:
return GenericMutationReturn(
return GenericJobMutationReturn(
success=False,
message=str(e),
code=500,

View File

@ -0,0 +1,120 @@
"""
A task to start the system upgrade or rebuild by starting a systemd unit.
After starting, track the status of the systemd unit and update the Job
status accordingly.
"""
import subprocess
from selfprivacy_api.utils.huey import huey
from selfprivacy_api.jobs import JobStatus, Jobs, Job
import time
@huey.task()
def rebuild_system_task(job: Job, upgrade: bool = False):
"""Rebuild the system"""
try:
if upgrade:
command = ["systemctl", "start", "sp-nixos-upgrade.service"]
else:
command = ["systemctl", "start", "sp-nixos-rebuild.service"]
subprocess.run(
command,
check=True,
start_new_session=True,
shell=False,
)
Jobs.update(
job=job,
status=JobStatus.RUNNING,
status_text="Rebuilding the system...",
)
# Get current time to handle timeout
start_time = time.time()
# Wait for the systemd unit to start
while True:
try:
status = subprocess.run(
["systemctl", "is-active", "selfprivacy-upgrade"],
check=True,
capture_output=True,
text=True,
)
if status.stdout.strip() == "active":
log_line = subprocess.run(
[
"journalctl",
"-u",
"selfprivacy-upgrade",
"-n",
"1",
"-o",
"cat",
],
check=True,
capture_output=True,
text=True,
).stdout.strip()
Jobs.update(
job=job,
status=JobStatus.RUNNING,
status_text=f"Rebuilding the system... Latest log line: {log_line}",
)
break
# Timeount after 5 minutes
if time.time() - start_time > 300:
Jobs.update(
job=job,
status=JobStatus.ERROR,
error="System rebuild timed out.",
)
return
time.sleep(1)
except subprocess.CalledProcessError:
pass
Jobs.update(
job=job,
status=JobStatus.RUNNING,
status_text="Rebuilding the system...",
)
# Wait for the systemd unit to finish
while True:
try:
status = subprocess.run(
["systemctl", "is-active", "selfprivacy-upgrade"],
check=True,
capture_output=True,
text=True,
)
if status.stdout.strip() == "inactive":
Jobs.update(
job=job,
status=JobStatus.FINISHED,
result="System rebuilt.",
progress=100,
)
elif status.stdout.strip() == "failed":
Jobs.update(
job=job,
status=JobStatus.ERROR,
error="System rebuild failed.",
)
break
# Timeout of 60 minutes
if time.time() - start_time > 3600:
Jobs.update(
job=job,
status=JobStatus.ERROR,
error="System rebuild timed out.",
)
break
except subprocess.CalledProcessError:
pass
time.sleep(5)
except subprocess.CalledProcessError as e:
Jobs.update(
job=job,
status=JobStatus.ERROR,
status_text=str(e),
)

View File

@ -11,9 +11,13 @@ Adding DISABLE_ALL to that array disables the migrations module entirely.
from selfprivacy_api.utils import ReadUserData, UserDataFiles
from selfprivacy_api.migrations.write_token_to_redis import WriteTokenToRedis
from selfprivacy_api.migrations.check_for_system_rebuild_jobs import (
CheckForSystemRebuildJobs,
)
migrations = [
WriteTokenToRedis(),
CheckForSystemRebuildJobs(),
]

View File

@ -0,0 +1,47 @@
from selfprivacy_api.migrations.migration import Migration
from selfprivacy_api.jobs import JobStatus, Jobs
class CheckForSystemRebuildJobs(Migration):
"""Check if there are unfinished system rebuild jobs and finish them"""
def get_migration_name(self):
return "check_for_system_rebuild_jobs"
def get_migration_description(self):
return "Check if there are unfinished system rebuild jobs and finish them"
def is_migration_needed(self):
# Check if there are any unfinished system rebuild jobs
for job in Jobs.get_jobs():
if (
job.type_id
in [
"system.nixos.rebuild",
"system.nixos.upgrade",
]
) and job.status in [
JobStatus.CREATED,
JobStatus.RUNNING,
]:
return True
def migrate(self):
# As the API is restarted, we assume that the jobs are finished
for job in Jobs.get_jobs():
if (
job.type_id
in [
"system.nixos.rebuild",
"system.nixos.upgrade",
]
) and job.status in [
JobStatus.CREATED,
JobStatus.RUNNING,
]:
Jobs.update(
job=job,
status=JobStatus.FINISHED,
result="System rebuilt.",
progress=100,
)