git.delta.rocks / jrsonnet / refs/commits / 741106e60111

difftreelog

feat automatic rollback

Yaroslav Bolyukin2023-10-15parent: #4340a04.patch.diff
in: trunk

10 files changed

modifiedCargo.lockdiffbeforeafterboth
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -610,6 +610,12 @@
 ]
 
 [[package]]
+name = "either"
+version = "1.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
+
+[[package]]
 name = "encode_unicode"
 version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -684,6 +690,7 @@
  "futures",
  "hostname",
  "indicatif",
+ "itertools",
  "nixlike",
  "once_cell",
  "peg",
@@ -1127,6 +1134,15 @@
 ]
 
 [[package]]
+name = "itertools"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
+dependencies = [
+ "either",
+]
+
+[[package]]
 name = "itoa"
 version = "1.0.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
modifiedCargo.tomldiffbeforeafterboth
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,2 +1,3 @@
 [workspace]
 members = ["crates/*", "cmds/*"]
+resolver = "2"
modifiedcmds/fleet/Cargo.tomldiffbeforeafterboth
--- a/cmds/fleet/Cargo.toml
+++ b/cmds/fleet/Cargo.toml
@@ -34,3 +34,4 @@
 futures = "0.3.17"
 tracing-indicatif = "0.3.5"
 indicatif = "0.17.7"
+itertools = "0.11.0"
modifiedcmds/fleet/src/cmds/build_systems.rsdiffbeforeafterboth
before · cmds/fleet/src/cmds/build_systems.rs
1use std::{env::current_dir, time::Duration};23use crate::command::MyCommand;4use crate::host::Config;5use anyhow::Result;6use clap::Parser;7use tokio::{task::LocalSet, time::sleep};8use tracing::{error, field, info, info_span, warn, Instrument};910#[derive(Parser, Clone)]11pub struct BuildSystems {12	/// Do not continue on error13	#[clap(long)]14	fail_fast: bool,15	/// Run builds as sudo16	#[clap(long)]17	privileged_build: bool,18	#[clap(subcommand)]19	subcommand: Subcommand,20}2122enum UploadAction {23	Test,24	Boot,25	Switch,26}27impl UploadAction {28	fn name(&self) -> &'static str {29		match self {30			UploadAction::Test => "test",31			UploadAction::Boot => "boot",32			UploadAction::Switch => "switch",33		}34	}3536	pub(crate) fn should_switch_profile(&self) -> bool {37		matches!(self, Self::Switch | Self::Boot)38	}39	pub(crate) fn should_activate(&self) -> bool {40		matches!(self, Self::Switch | Self::Test)41	}42}4344enum PackageAction {45	SdImage,46	InstallationCd,47}48impl PackageAction {49	fn build_attr(&self) -> String {50		match self {51			PackageAction::SdImage => "sdImage".to_owned(),52			PackageAction::InstallationCd => "installationCd".to_owned(),53		}54	}55}5657enum Action {58	Upload { action: Option<UploadAction> },59	Package(PackageAction),60}61impl Action {62	fn build_attr(&self) -> String {63		match self {64			Action::Upload { .. } => "toplevel".to_owned(),65			Action::Package(p) => p.build_attr(),66		}67	}68}6970impl From<Subcommand> for Action {71	fn from(s: Subcommand) -> Self {72		match s {73			Subcommand::Upload => Self::Upload { action: None },74			Subcommand::Test => Self::Upload {75				action: Some(UploadAction::Test),76			},77			Subcommand::Boot => Self::Upload {78				action: Some(UploadAction::Boot),79			},80			Subcommand::Switch => Self::Upload {81				action: Some(UploadAction::Switch),82			},83			Subcommand::SdImage => Self::Package(PackageAction::SdImage),84			Subcommand::InstallationCd => Self::Package(PackageAction::InstallationCd),85		}86	}87}8889#[derive(Parser, Clone)]90enum Subcommand {91	/// Upload, but do not switch92	Upload,93	/// Upload + switch to built system until reboot94	Test,95	/// Upload + switch to built system after reboot96	Boot,97	/// Upload + test + boot98	Switch,99100	/// Build SD .img image101	SdImage,102	/// Build an installation cd ISO image103	InstallationCd,104}105106impl BuildSystems {107	async fn build_task(self, config: Config, host: String) -> Result<()> {108		info!("building");109		let action = Action::from(self.subcommand.clone());110		let built = {111			let dir = tempfile::tempdir()?;112			dir.path().to_owned()113		};114115		let mut nix_build = MyCommand::new("nix");116		nix_build117			.args([118				"build",119				"--impure",120				"--json",121				// "--show-trace",122				"--no-link",123				"--option",124				"log-lines",125				"200",126			])127			.comparg("--out-link", &built)128			.arg(129				config.configuration_attr_name(&format!(130					"buildSystems.{}.{host}",131					action.build_attr()132				)),133			)134			.args(&config.nix_args);135136		if self.privileged_build {137			nix_build = nix_build.sudo();138		}139140		nix_build.run_nix().await.map_err(|e| {141			if action.build_attr() == "sdImage" {142				info!("sd-image build failed");143				info!("Make sure you have imported modulesPath/installer/sd-card/sd-image-<arch>[-installer].nix (For installer, you may want to check config)");144				info!("This module was automatically imported before, but was removed for better customization")145			}146			e147		})?;148		let built = std::fs::canonicalize(built)?;149150		match action {151			Action::Upload { action } => {152				if !config.is_local(&host) {153					info!("uploading system closure");154					let mut tries = 0;155					loop {156						let mut nix = MyCommand::new("nix");157						nix.arg("copy")158							.comparg("--to", format!("ssh://root@{host}"))159							.arg(&built);160						match nix.run_nix().await {161							Ok(()) => break,162							Err(e) if tries < 3 => {163								tries += 1;164								warn!("Copy failure ({}/3): {}", tries, e);165								sleep(Duration::from_millis(5000)).await;166							}167							Err(e) => return Err(e),168						}169					}170				}171				if let Some(action) = action {172					if action.should_switch_profile() {173						info!("switching generation");174						let mut cmd = MyCommand::new("nix-env");175						cmd.comparg("--profile", "/nix/var/nix/profiles/system")176							.comparg("--set", &built);177						config.run_on(&host, cmd, true).await?;178					}179					if action.should_activate() {180						info!("executing activation script");181						let mut switch_script = built.clone();182						switch_script.push("bin");183						switch_script.push("switch-to-configuration");184						let mut cmd = MyCommand::new(switch_script);185						cmd.arg(action.name());186						config.run_on(&host, cmd, true).await?;187					}188				}189			}190			Action::Package(PackageAction::SdImage) => {191				let mut out = current_dir()?;192				out.push(format!("sd-image-{}", host));193194				info!("building sd image to {:?}", out);195				let mut nix_build = MyCommand::new("nix");196				nix_build197					.args(["build", "--impure", "--no-link"])198					.comparg("--out-link", &out)199					.arg(config.configuration_attr_name(&format!("buildSystems.sdImage.{}", host,)))200					.args(&config.nix_args);201				if !self.fail_fast {202					nix_build.arg("--keep-going");203				}204				if self.privileged_build {205					nix_build = nix_build.sudo();206				}207208				nix_build.run_nix().await?;209			}210			Action::Package(PackageAction::InstallationCd) => {211				let mut out = current_dir()?;212				out.push(format!("installation-cd-{}", host));213214				info!("building sd image to {:?}", out);215				let mut nix_build = MyCommand::new("nix");216				nix_build217					.args(["build", "--impure", "--no-link"])218					.comparg("--out-link", &out)219					.arg(220						config.configuration_attr_name(&format!(221							"buildSystems.installationCd.{}",222							host,223						)),224					)225					.args(&config.nix_args);226				if !self.fail_fast {227					nix_build.arg("--keep-going");228				}229				if self.privileged_build {230					nix_build = nix_build.sudo();231				}232233				nix_build.run_nix().await?;234			}235		};236		Ok(())237	}238239	pub async fn run(self, config: &Config) -> Result<()> {240		let hosts = config.list_hosts().await?;241		let set = LocalSet::new();242		let this = &self;243		for host in hosts.iter() {244			if config.should_skip(host) {245				continue;246			}247			let config = config.clone();248			let host = host.clone();249			let this = this.clone();250			let span = info_span!("deployment", host = field::display(&host));251			set.spawn_local(252				(async move {253					match this.build_task(config, host).await {254						Ok(_) => {}255						Err(e) => {256							error!("failed to deploy host: {}", e)257						}258					}259				})260				.instrument(span),261			);262		}263		set.await;264		Ok(())265	}266}
after · cmds/fleet/src/cmds/build_systems.rs
1use std::{env::current_dir, time::Duration};23use crate::command::MyCommand;4use crate::host::Config;5use anyhow::{anyhow, Result};6use clap::Parser;7use itertools::Itertools;8use tokio::{task::LocalSet, time::sleep};9use tracing::{error, field, info, info_span, warn, Instrument};1011#[derive(Parser, Clone)]12pub struct BuildSystems {13	/// Do not continue on error14	#[clap(long)]15	fail_fast: bool,16	/// Disable automatic rollback17	#[clap(long)]18	disable_rollback: bool,19	/// Run builds as sudo20	#[clap(long)]21	privileged_build: bool,22	#[clap(subcommand)]23	subcommand: Subcommand,24}2526enum UploadAction {27	Test,28	Boot,29	Switch,30}31impl UploadAction {32	fn name(&self) -> &'static str {33		match self {34			UploadAction::Test => "test",35			UploadAction::Boot => "boot",36			UploadAction::Switch => "switch",37		}38	}3940	pub(crate) fn should_switch_profile(&self) -> bool {41		matches!(self, Self::Switch | Self::Boot)42	}43	pub(crate) fn should_activate(&self) -> bool {44		matches!(self, Self::Switch | Self::Test)45	}46	pub(crate) fn should_schedule_rollback_run(&self) -> bool {47		matches!(self, Self::Switch | Self::Test)48	}49}5051enum PackageAction {52	SdImage,53	InstallationCd,54}55impl PackageAction {56	fn build_attr(&self) -> String {57		match self {58			PackageAction::SdImage => "sdImage".to_owned(),59			PackageAction::InstallationCd => "installationCd".to_owned(),60		}61	}62}6364enum Action {65	Upload { action: Option<UploadAction> },66	Package(PackageAction),67}68impl Action {69	fn build_attr(&self) -> String {70		match self {71			Action::Upload { .. } => "toplevel".to_owned(),72			Action::Package(p) => p.build_attr(),73		}74	}75}7677impl From<Subcommand> for Action {78	fn from(s: Subcommand) -> Self {79		match s {80			Subcommand::Upload => Self::Upload { action: None },81			Subcommand::Test => Self::Upload {82				action: Some(UploadAction::Test),83			},84			Subcommand::Boot => Self::Upload {85				action: Some(UploadAction::Boot),86			},87			Subcommand::Switch => Self::Upload {88				action: Some(UploadAction::Switch),89			},90			Subcommand::SdImage => Self::Package(PackageAction::SdImage),91			Subcommand::InstallationCd => Self::Package(PackageAction::InstallationCd),92		}93	}94}9596#[derive(Parser, Clone)]97enum Subcommand {98	/// Upload, but do not switch99	Upload,100	/// Upload + switch to built system until reboot101	Test,102	/// Upload + switch to built system after reboot103	Boot,104	/// Upload + test + boot105	Switch,106107	/// Build SD .img image108	SdImage,109	/// Build an installation cd ISO image110	InstallationCd,111}112113struct Generation {114	id: u32,115	current: bool,116	datetime: String,117}118async fn get_current_generation(config: &Config, host: &str) -> Result<Generation> {119	let mut cmd = MyCommand::new("nix-env");120	cmd.comparg("--profile", "/nix/var/nix/profiles/system")121		.arg("--list-generations");122	// Sudo is required due to --list-generations acquiring lock on the profile.123	let data = config.run_string_on(&host, cmd, true).await?;124	let generations = data125		.split('\n')126		.map(|e| e.trim())127		.filter(|&l| l != "")128		.filter_map(|g| {129			let gen: Option<Generation> = try {130				let mut parts = g.split_whitespace();131				let id = parts.next()?;132				let id: u32 = id.parse().ok()?;133				let date = parts.next()?;134				let time = parts.next()?;135				let current = if let Some(current) = parts.next() {136					if current == "(current)" {137						Some(true)138					} else {139						None140					}141				} else {142					Some(false)143				};144				let current = current?;145				if parts.next().is_some() {146					warn!("unexpected text after generation: {g}");147				}148				Generation {149					id,150					current,151					datetime: format!("{date} {time}"),152				}153			};154			if gen.is_none() {155				warn!("bad generation: {g}")156			}157			gen158		})159		.collect::<Vec<_>>();160	let current = generations161		.into_iter()162		.filter(|g| g.current)163		.at_most_one()164		.map_err(|_e| anyhow!("bad list-generations output"))?165		.ok_or_else(|| anyhow!("failed to find generation"))?;166	Ok(current)167}168169impl BuildSystems {170	async fn build_task(self, config: Config, host: String) -> Result<()> {171		info!("building");172		let action = Action::from(self.subcommand.clone());173		let built = {174			let dir = tempfile::tempdir()?;175			dir.path().to_owned()176		};177178		let mut nix_build = MyCommand::new("nix");179		nix_build180			.args([181				"build",182				"--impure",183				"--json",184				// "--show-trace",185				"--no-link",186				"--option",187				"log-lines",188				"200",189			])190			.comparg("--out-link", &built)191			.arg(192				config.configuration_attr_name(&format!(193					"buildSystems.{}.{host}",194					action.build_attr()195				)),196			)197			.args(&config.nix_args);198199		if self.privileged_build {200			nix_build = nix_build.sudo();201		}202203		nix_build.run_nix().await.map_err(|e| {204			if action.build_attr() == "sdImage" {205				info!("sd-image build failed");206				info!("Make sure you have imported modulesPath/installer/sd-card/sd-image-<arch>[-installer].nix (For installer, you may want to check config)");207				info!("This module was automatically imported before, but was removed for better customization")208			}209			e210		})?;211		let built = std::fs::canonicalize(built)?;212213		match action {214			Action::Upload { action } => {215				if !config.is_local(&host) {216					info!("uploading system closure");217					let mut tries = 0;218					loop {219						let mut nix = MyCommand::new("nix");220						nix.arg("copy")221							.arg("--substitute-on-destination")222							.comparg("--to", format!("ssh://root@{host}"))223							.arg(&built);224						match nix.run_nix().await {225							Ok(()) => break,226							Err(e) if tries < 3 => {227								tries += 1;228								warn!("Copy failure ({}/3): {}", tries, e);229								sleep(Duration::from_millis(5000)).await;230							}231							Err(e) => return Err(e),232						}233					}234				}235				if let Some(action) = action {236					let mut failed = false;237					// TODO: Lockfile, to prevent concurrent system switch?238					// TODO: If rollback target exists - bail, it should be removed. Lockfile will not work in case if rollback239					// is scheduler on next boot (default behavior). On current boot - rollback activator will fail due to240					// unit name conflict in systemd-run241					if !self.disable_rollback {242						let _span = info_span!("preparing").entered();243						info!("preparing for rollback");244						let generation = get_current_generation(&config, &host).await?;245						info!(246							"rollback target would be {} {}",247							generation.id, generation.datetime248						);249						{250							let mut cmd = MyCommand::new("sh");251							cmd.arg("-c").arg(format!("mark=$(mktemp -p /etc -t fleet_rollback_marker.XXXXX) && echo -n {} > $mark && mv --no-clobber $mark /etc/fleet_rollback_marker", generation.id));252							if let Err(e) = config.run_on(&host, cmd, true).await {253								error!("failed to set rollback marker: {e}");254								failed = true;255							}256						}257						// Activation script also starts rollback-watchdog.timer, however, it is possible that it won't be started.258						// Kicking it on manually will work best.259						//260						// There wouldn't be conflict, because here we trigger start of the primary service, and systemd will261						// only allow one instance of it.262						if action.should_schedule_rollback_run() {263							let mut cmd = MyCommand::new("systemd-run");264							cmd.comparg("--on-active", "3min")265								.comparg("--unit", "rollback-watchdog-run")266								.arg("systemctl")267								.arg("start")268								.arg("rollback-watchdog.service");269							if let Err(e) = config.run_on(&host, cmd, true).await {270								error!("failed to schedule rollback run: {e}");271								failed = true;272							}273						}274					}275					if action.should_switch_profile() && !failed {276						info!("switching generation");277						let mut cmd = MyCommand::new("nix-env");278						cmd.comparg("--profile", "/nix/var/nix/profiles/system")279							.comparg("--set", &built);280						if let Err(e) = config.run_on(&host, cmd, true).await {281							error!("failed to switch generation: {e}");282							failed = true;283						}284					}285					if action.should_activate() && !failed {286						let _span = info_span!("activating").entered();287						info!("executing activation script");288						let mut switch_script = built.clone();289						switch_script.push("bin");290						switch_script.push("switch-to-configuration");291						let mut cmd = MyCommand::new(switch_script);292						cmd.arg(action.name());293						if let Err(e) = config.run_on(&host, cmd, true).in_current_span().await {294							error!("failed to activate: {e}");295							failed = true;296						}297					}298					if !self.disable_rollback {299						{300							let _span = info_span!("rollback").entered();301							if failed {302								info!("executing rollback");303								let mut cmd = MyCommand::new("systemctl");304								cmd.arg("start").arg("rollback-watchdog.service");305								if let Err(e) = config.run_on(&host, cmd, true).await {306									error!("failed to rollback: {e}");307								}308							} else {309								info!("marking upgrade as successful");310								let mut cmd = MyCommand::new("rm");311								cmd.arg("-f").arg("/etc/fleet_rollback_marker");312								if let Err(e) =313									config.run_on(&host, cmd, true).in_current_span().await314								{315									error!("failed to remove rollback marker. This is bad, as the system will be rolled back by watchdog: {e}")316								}317							}318						}319						{320							let _span = info_span!("disarm").entered();321							info!("disarming watchdog, just in case");322							{323								let mut cmd = MyCommand::new("systemctl");324								cmd.arg("stop").arg("rollback-watchdog.timer");325								if let Err(_e) = config.run_on(&host, cmd, true).await {326									// It is ok, if there was no reboot.327								}328							}329							if action.should_schedule_rollback_run() {330								let mut cmd = MyCommand::new("systemctl");331								cmd.arg("stop").arg("rollback-watchdog-run.timer");332								if let Err(e) = config.run_on(&host, cmd, true).await {333									error!("failed to disarm rollback run: {e}");334								}335							}336						}337					}338				}339			}340			Action::Package(PackageAction::SdImage) => {341				let mut out = current_dir()?;342				out.push(format!("sd-image-{}", host));343344				info!("building sd image to {:?}", out);345				let mut nix_build = MyCommand::new("nix");346				nix_build347					.args(["build", "--impure", "--no-link"])348					.comparg("--out-link", &out)349					.arg(config.configuration_attr_name(&format!("buildSystems.sdImage.{}", host,)))350					.args(&config.nix_args);351				if !self.fail_fast {352					nix_build.arg("--keep-going");353				}354				if self.privileged_build {355					nix_build = nix_build.sudo();356				}357358				nix_build.run_nix().await?;359			}360			Action::Package(PackageAction::InstallationCd) => {361				let mut out = current_dir()?;362				out.push(format!("installation-cd-{}", host));363364				info!("building sd image to {:?}", out);365				let mut nix_build = MyCommand::new("nix");366				nix_build367					.args(["build", "--impure", "--no-link"])368					.comparg("--out-link", &out)369					.arg(370						config.configuration_attr_name(&format!(371							"buildSystems.installationCd.{}",372							host,373						)),374					)375					.args(&config.nix_args);376				if !self.fail_fast {377					nix_build.arg("--keep-going");378				}379				if self.privileged_build {380					nix_build = nix_build.sudo();381				}382383				nix_build.run_nix().await?;384			}385		};386		Ok(())387	}388389	pub async fn run(self, config: &Config) -> Result<()> {390		let hosts = config.list_hosts().await?;391		let set = LocalSet::new();392		let this = &self;393		for host in hosts.iter() {394			if config.should_skip(host) {395				continue;396			}397			let config = config.clone();398			let host = host.clone();399			let this = this.clone();400			let span = info_span!("deployment", host = field::display(&host));401			set.spawn_local(402				(async move {403					match this.build_task(config, host).await {404						Ok(_) => {}405						Err(e) => {406							error!("failed to deploy host: {}", e)407						}408					}409				})410				.instrument(span),411			);412		}413		set.await;414		Ok(())415	}416}
modifiedcmds/fleet/src/command.rsdiffbeforeafterboth
--- a/cmds/fleet/src/command.rs
+++ b/cmds/fleet/src/command.rs
@@ -143,12 +143,14 @@
 
 	pub async fn run_nix_string(self) -> Result<String> {
 		let str = self.clone().into_string();
-		let cmd = self.into_command();
+		let mut cmd = self.into_command();
+		cmd.arg("--log-format").arg("internal-json");
 		run_nix_inner_stdout(str, cmd, &mut NixHandler::default()).await
 	}
 	pub async fn run_nix(self) -> Result<()> {
 		let str = self.clone().into_string();
 		let mut cmd = self.into_command();
+		cmd.arg("--log-format").arg("internal-json");
 		cmd.stdout(Stdio::inherit());
 		run_nix_inner(str, cmd, &mut NixHandler::default()).await
 	}
@@ -410,7 +412,6 @@
 	handler: &mut dyn Handler,
 ) -> Result<Option<String>> {
 	info!("running {str}");
-	cmd.arg("--log-format").arg("internal-json");
 	cmd.stderr(Stdio::piped());
 	cmd.stdout(Stdio::piped());
 	let mut child = cmd.spawn()?;
modifiedcmds/fleet/src/main.rsdiffbeforeafterboth
--- a/cmds/fleet/src/main.rs
+++ b/cmds/fleet/src/main.rs
@@ -1,3 +1,5 @@
+#![feature(try_blocks)]
+
 pub mod cmds;
 pub mod command;
 pub mod host;
@@ -6,16 +8,14 @@
 mod fleetdata;
 
 use std::ffi::OsString;
-use std::io;
 use std::time::Duration;
 
-use anyhow::{anyhow, bail, Result};
+use anyhow::{bail, Result};
 use clap::Parser;
 
 use cmds::{build_systems::BuildSystems, info::Info, secrets::Secrets};
 use host::{Config, FleetOpts};
 use indicatif::{ProgressState, ProgressStyle};
-use tokio::fs;
 use tokio::process::Command;
 use tracing::{info, metadata::LevelFilter};
 use tracing_indicatif::IndicatifLayer;
@@ -79,9 +79,6 @@
 		Opts::Prefetch(p) => p.run(config).await?,
 	};
 	Ok(())
-}
-fn elapsed_subsec(state: &ProgressState, writer: &mut dyn std::fmt::Write) {
-	let _ = writer.write_str(&format!("{:?}", state.elapsed()));
 }
 
 #[tokio::main]
modifiedcmds/install-secrets/Cargo.tomldiffbeforeafterboth
--- a/cmds/install-secrets/Cargo.toml
+++ b/cmds/install-secrets/Cargo.toml
@@ -9,7 +9,7 @@
 env_logger = "0.10.0"
 log = "0.4.14"
 nix = "0.26.1"
-serde = "1.0.130"
+serde = { version = "1.0.130", features = ["derive"] }
 serde_json = "1.0.89"
 clap = { version = "4.0.29", features = [
 	"derive",
modifiednixos/modules/module-list.nixdiffbeforeafterboth
--- a/nixos/modules/module-list.nix
+++ b/nixos/modules/module-list.nix
@@ -2,4 +2,5 @@
   ../fleetPkgs.nix
   ../meta.nix
   ../secrets.nix
+  ../rollback.nix
 ]
addednixos/rollback.nixdiffbeforeafterboth
--- /dev/null
+++ b/nixos/rollback.nix
@@ -0,0 +1,45 @@
+{config, ...}: {
+  # TODO: Make it work with systemd-initrd approach.
+  # In this case we can't just switch generation and re-run activation script, since the root filesystem might not be
+  # mounted yet. We need to explicitly remove the last generation, and this needs deeper integration with systemd/grub/
+  # whatever user uses. boot.json also might help here.
+
+  systemd.services.rollback-watchdog = {
+    description = "Rollback watchdog";
+    script = ''
+      set -eu
+      if [ -f /etc/fleet_rollback_marker ]; then
+        echo "found the rollback marker, switching to older generation"
+        target=$(cat /etc/fleet_rollback_marker)
+        echo "rolling back profile"
+        nix profile rollback --profile /nix/var/nix/profiles/system --to "$target"
+        echo "executing activation script"
+        "/nix/var/nix/profiles/system-$target-link/bin/switch-to-configuration" switch
+        echo "removing rollback marker"
+        rm -f /etc/fleet_rollback_marker
+      else
+        echo "rollback marker was removed, upgrade is succeeded"
+      fi
+    '';
+    path = [
+      # Should have nix-command support
+      config.nix.package
+    ];
+    serviceConfig.Type = "exec";
+    unitConfig = {
+      X-StopOnRemoval = false;
+    };
+  };
+
+  systemd.timers.rollback-watchdog = {
+    description = "Timer for rollback watchdog";
+    wantedBy = ["timers.target"];
+    timerConfig = {
+      OnUnitActiveSec = "3min";
+      RemainAfterElapse = false;
+    };
+    unitConfig = {
+      ConditionPathExists = "/etc/fleet_rollback_marker";
+    };
+  };
+}
modifiedpkgs/fleet-install-secrets.nixdiffbeforeafterboth
--- a/pkgs/fleet-install-secrets.nix
+++ b/pkgs/fleet-install-secrets.nix
@@ -6,7 +6,7 @@
   name = "${pname}-${version}";
 
   src = ../.;
-  cargoBuildFlags = "-p ${pname}";
+  buildAndTestSubdir = "cmds/install-secrets";
   cargoLock = {
     lockFile = ../Cargo.lock;
     outputHashes = {