git.delta.rocks / jrsonnet / refs/commits / 3627c6c6df00

difftreelog

feat nixos-install target

Lach2025-04-06parent: #3972fee.patch.diff
in: trunk

6 files changed

modifiedCargo.lockdiffbeforeafterboth
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -924,7 +924,6 @@
  "hostname",
  "human-repr",
  "indicatif",
- "indoc",
  "itertools 0.13.0",
  "nix-eval",
  "nixlike",
@@ -958,6 +957,7 @@
  "fleet-shared",
  "futures",
  "hostname",
+ "indoc",
  "itertools 0.13.0",
  "nix-eval",
  "nixlike",
modifiedcmds/fleet/Cargo.tomldiffbeforeafterboth
--- a/cmds/fleet/Cargo.toml
+++ b/cmds/fleet/Cargo.toml
@@ -47,7 +47,6 @@
 nix-eval.workspace = true
 nom = "7.1.3"
 fleet-base = { version = "0.1.0", path = "../../crates/fleet-base" }
-indoc = "2.0.6"
 
 [features]
 default = ["indicatif"]
modifiedcmds/fleet/src/cmds/build_systems.rsdiffbeforeafterboth
--- a/cmds/fleet/src/cmds/build_systems.rs
+++ b/cmds/fleet/src/cmds/build_systems.rs
@@ -1,9 +1,9 @@
-use std::{env::current_dir, os::unix::fs::symlink, path::PathBuf, str::FromStr, time::Duration};
+use std::{env::current_dir, os::unix::fs::symlink, path::PathBuf, time::Duration};
 
 use anyhow::{anyhow, bail, Result};
 use clap::{Parser, ValueEnum};
 use fleet_base::{
-	host::{Config, ConfigHost},
+	host::{Config, ConfigHost, DeployKind},
 	opts::FleetOpts,
 };
 use itertools::Itertools as _;
@@ -131,6 +131,13 @@
 	specialisation: Option<String>,
 	disable_rollback: bool,
 ) -> Result<()> {
+	let deploy_kind = host.deploy_kind().await?;
+	if deploy_kind == DeployKind::NixosInstall
+		&& !matches!(action, DeployAction::Boot | DeployAction::Upload)
+	{
+		bail!("nixos-install deploy kind only supports boot and upload actions");
+	}
+
 	let mut failed = false;
 
 	// TODO: Lockfile, to prevent concurrent system switch?
@@ -177,39 +184,74 @@
 			}
 		}
 	}
-
-	if action.should_switch_profile() && !failed {
-		info!("switching system profile generation");
-		// It would also be possible to update profile atomically during copy:
-		// https://github.com/NixOS/nix/pull/11657
-		let mut cmd = host.cmd("nix").await?;
-		cmd.arg("build");
-		cmd.comparg("--profile", "/nix/var/nix/profiles/system");
-		cmd.arg(&built);
-		if let Err(e) = cmd.sudo().run_nix().await {
-			error!("failed to switch system profile generation: {e}");
+	if deploy_kind == DeployKind::NixosInstall {
+		info!(
+			"running nixos-install to switch profile, install bootloader, and perform activation"
+		);
+		let mut cmd = host.cmd("nixos-install").await?;
+		cmd.arg("--system").arg(&built).args([
+			// Channels here aren't fleet host system channels, but channels embedded in installation cd, which might be old.
+			// It is possible to copy host channels, but I would prefer non-flake nix just to be unsupported.
+			"--no-channel-copy",
+			"--root",
+			"/mnt",
+		]);
+		if let Err(e) = cmd.sudo().run().await {
+			error!("failed to execute nixos-install: {e}");
 			failed = true;
 		}
-	}
+	} else {
+		if action.should_switch_profile() && !failed {
+			info!("switching system profile generation");
 
-	// FIXME: Connection might be disconnected after activation run
+			// To avoid even more problems, using nixos-install for now.
+			// // nix build is unable to work with --store argument for some reason, and nix until 2.26 didn't support copy with --profile argument,
+			// // falling back to using nix-env command
+			// // After stable NixOS starts using 2.26 - use `nix --store /mnt copy --from /mnt --profile ...` here, and instead of nix build below.
+			// let mut cmd = host.cmd("nix-env").await?;
+			// cmd.args([
+			// 	"--store",
+			// 	"/mnt",
+			// 	"--profile",
+			// 	"/mnt/nix/var/nix/profiles/system",
+			// 	"--set",
+			// ])
+			// .arg(&built);
+			// if let Err(e) = cmd.sudo().run_nix().await {
+			// 	error!("failed to switch system profile generation: {e}");
+			// 	failed = true;
+			// }
+			// It would also be possible to update profile atomically during copy:
+			// https://github.com/NixOS/nix/pull/11657
+			let mut cmd = host.nix_cmd().await?;
+			cmd.arg("build");
+			cmd.comparg("--profile", "/nix/var/nix/profiles/system");
+			cmd.arg(&built);
+			if let Err(e) = cmd.sudo().run_nix().await {
+				error!("failed to switch system profile generation: {e}");
+				failed = true;
+			}
+		}
 
-	if action.should_activate() && !failed {
-		let _span = info_span!("activating").entered();
-		info!("executing activation script");
-		let specialised = if let Some(specialisation) = specialisation {
-			let mut specialised = built.join("specialisation");
-			specialised.push(specialisation);
-			specialised
-		} else {
-			built.clone()
-		};
-		let switch_script = specialised.join("bin/switch-to-configuration");
-		let mut cmd = host.cmd(switch_script).in_current_span().await?;
-		cmd.arg(action.name().expect("upload.should_activate == false"));
-		if let Err(e) = cmd.sudo().run().in_current_span().await {
-			error!("failed to activate: {e}");
-			failed = true;
+		// FIXME: Connection might be disconnected after activation run
+
+		if action.should_activate() && !failed {
+			let _span = info_span!("activating").entered();
+			info!("executing activation script");
+			let specialised = if let Some(specialisation) = specialisation {
+				let mut specialised = built.join("specialisation");
+				specialised.push(specialisation);
+				specialised
+			} else {
+				built.clone()
+			};
+			let switch_script = specialised.join("bin/switch-to-configuration");
+			let mut cmd = host.cmd(switch_script).in_current_span().await?;
+			cmd.arg(action.name().expect("upload.should_activate == false"));
+			if let Err(e) = cmd.sudo().run().in_current_span().await {
+				error!("failed to activate: {e}");
+				failed = true;
+			}
 		}
 	}
 	if action.should_create_rollback_marker() {
@@ -330,24 +372,6 @@
 		drop(batch);
 		set.await;
 		Ok(())
-	}
-}
-
-#[derive(Clone, PartialEq, Copy)]
-enum DeployKind {
-	// NixOS => NixOS managed by fleet
-	UpgradeToFleet,
-	// NixOS managed by fleet => NixOS managed by fleet
-	Fleet,
-}
-impl FromStr for DeployKind {
-	type Err = anyhow::Error;
-	fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
-		match s {
-			"upgrade-to-fleet" => Ok(Self::UpgradeToFleet),
-			"fleet" => Ok(Self::Fleet),
-			v => bail!("unknown deploy_kind: {v}; expected on of \"upgrade-to-fleet\", \"fleet\""),
-		}
 	}
 }
 
@@ -367,8 +391,9 @@
 			let local_host = config.local_host();
 			let opts = opts.clone();
 			let batch = batch.clone();
-			let mut deploy_kind: Option<DeployKind> =
-				opts.action_attr(&host, "deploy_kind").await?;
+			if let Some(deploy_kind) = opts.action_attr::<DeployKind>(&host, "deploy_kind").await? {
+				host.set_deploy_kind(deploy_kind);
+			};
 
 			set.spawn_local(
 				(async move {
@@ -381,28 +406,14 @@
 								return;
 							}
 						};
-					if deploy_kind == None {
-						let is_fleet_managed = match host.file_exists("/etc/FLEET_HOST").await {
-							Ok(v) => v,
-							Err(e) => {
-								error!("failed to query remote system kind: {}", e);
-								return;
-							},
-						};
-						if !is_fleet_managed {
-							error!(indoc::indoc!{"
-								host is not marked as managed by fleet
-								if you're not trying to lustrate/install system from scratch,
-								you should either
-									1. manually create /etc/FLEET_HOST file on the target host,
-									2. use ?deploy_kind=fleet host argument if you're upgrading from older version of fleet
-									3. use ?deploy_kind=upgrade_to_fleet if you're upgrading from plain nixos to fleet-managed nixos
-							"});
+
+					let deploy_kind = match host.deploy_kind().await {
+						Ok(v) => v,
+						Err(e) => {
+							error!("failed to query target deploy kind: {e}");
 							return;
 						}
-						deploy_kind = Some(DeployKind::Fleet);
-					}
-					let deploy_kind = deploy_kind.expect("deploy_kind is set");
+					};
 
 					// TODO: Make disable_rollback a host attribute instead
 					let mut disable_rollback = self.disable_rollback;
modifiedcrates/fleet-base/Cargo.tomldiffbeforeafterboth
--- a/crates/fleet-base/Cargo.toml
+++ b/crates/fleet-base/Cargo.toml
@@ -13,6 +13,7 @@
 fleet-shared.workspace = true
 futures = "0.3.30"
 hostname = "0.4.0"
+indoc = "2.0.6"
 itertools = "0.13.0"
 nix-eval.workspace = true
 nixlike.workspace = true
modifiedcrates/fleet-base/src/host.rsdiffbeforeafterboth
58 Su,58 Su,
59}59}
6060
61#[derive(Clone, PartialEq, Copy)]
62pub enum DeployKind {
63 /// NixOS => NixOS managed by fleet
64 UpgradeToFleet,
65 /// NixOS managed by fleet => NixOS managed by fleet
66 Fleet,
67 /// Remote host has /mnt, /mnt/boot mounted,
68 /// generated config is added to fleet configuration.
69 NixosInstall,
70}
71
72impl FromStr for DeployKind {
73 type Err = anyhow::Error;
74 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
75 match s {
76 "upgrade-to-fleet" => Ok(Self::UpgradeToFleet),
77 "fleet" => Ok(Self::Fleet),
78 "nixos-install" => Ok(Self::NixosInstall),
79 v => bail!("unknown deploy_kind: {v}; expected on of \"upgrade-to-fleet\", \"fleet\", \"nixos-install\""),
80 }
81 }
82}
61pub struct ConfigHost {83pub struct ConfigHost {
62 config: Config,84 config: Config,
63 pub name: String,85 pub name: String,
64 groups: OnceCell<Vec<String>>,86 groups: OnceCell<Vec<String>>,
87
88 deploy_kind: OnceCell<DeployKind>,
6589
66 pub host_config: Option<Value>,90 pub host_config: Option<Value>,
67 pub nixos_config: OnceCell<Value>,91 pub nixos_config: OnceCell<Value>,
73}97}
74// TODO: Move command helpers away with connectivity refactor98// TODO: Move command helpers away with connectivity refactor
75impl ConfigHost {99impl ConfigHost {
100 pub fn set_deploy_kind(&self, kind: DeployKind) {
101 self.deploy_kind
102 .set(kind)
103 .ok()
104 .expect("deploy kind is already set");
105 }
106 pub async fn deploy_kind(&self) -> Result<DeployKind> {
107 if let Some(kind) = self.deploy_kind.get() {
108 return Ok(kind.clone());
109 }
110 let is_fleet_managed = match self.file_exists("/etc/FLEET_HOST").await {
111 Ok(v) => v,
112 Err(e) => {
113 bail!("failed to query remote system kind: {}", e);
114 }
115 };
116 if !is_fleet_managed {
117 bail!(indoc::indoc! {"
118 host is not marked as managed by fleet
119 if you're not trying to lustrate/install system from scratch,
120 you should either
121 1. manually create /etc/FLEET_HOST file on the target host,
122 2. use ?deploy_kind=fleet host argument if you're upgrading from older version of fleet
123 3. use ?deploy_kind=upgrade_to_fleet if you're upgrading from plain nixos to fleet-managed nixos
124 "});
125 }
126 // TOCTOU is possible
127 let _ = self.deploy_kind.set(DeployKind::Fleet);
128 Ok(self
129 .deploy_kind
130 .get()
131 .expect("deploy kind is just set")
132 .clone())
133 }
76 pub async fn escalation_strategy(&self) -> Result<EscalationStrategy> {134 pub async fn escalation_strategy(&self) -> Result<EscalationStrategy> {
77 // Prefer sudo, as run0 has some gotchas with polkit135 // Prefer sudo, as run0 has some gotchas with polkit
78 // and too many repeating prompts.136 // and too many repeating prompts.
189 Ok(MyCommand::new_on(escalation, cmd, session))247 Ok(MyCommand::new_on(escalation, cmd, session))
190 }248 }
191 }249 }
250 pub async fn nix_cmd(&self) -> Result<MyCommand> {
251 let mut nix = self.cmd("nix").await?;
252 nix.args([
253 "--extra-experimental-features",
254 "nix-command",
255 "--extra-experimental-features",
256 "flakes",
257 ]);
258 Ok(nix)
259 }
192260
193 pub async fn decrypt(&self, data: SecretData) -> Result<Vec<u8>> {261 pub async fn decrypt(&self, data: SecretData) -> Result<Vec<u8>> {
194 ensure!(data.encrypted, "secret is not encrypted");262 ensure!(data.encrypted, "secret is not encrypted");
231 EscalationStrategy::Su,299 EscalationStrategy::Su,
232 "nix",300 "nix",
233 );301 );
234 nix.arg("copy")302 nix.arg("copy").arg("--substitute-on-destination");
235 .arg("--substitute-on-destination")303
304 match self.deploy_kind().await? {
305 DeployKind::Fleet | DeployKind::UpgradeToFleet => {
236 .comparg("--to", format!("ssh-ng://{}", self.name))306 nix.comparg("--to", format!("ssh-ng://{}", self.name));
307 }
308 DeployKind::NixosInstall => {
309 nix
310 // Signature checking makes no sense with remote-store store argument set, as we're not even interacting with remote nix daemon
311 .arg("--no-check-sigs")
312 .comparg(
313 "--to",
314 format!("ssh-ng://root@{}-install?remote-store=/mnt", self.name),
315 );
316 }
317 }
237 .arg(path);318 nix.arg(path);
238 nix.run_nix().await.context("nix copy")?;319 nix.run_nix().await.context("nix copy")?;
239 Ok(path.to_owned())320 Ok(path.to_owned())
240 }321 }
354435
355 local: true,436 local: true,
356 session: OnceLock::new(),437 session: OnceLock::new(),
438 deploy_kind: OnceCell::new(),
357 }439 }
358 }440 }
359441
372 // TODO: Remove with connectivit refactor454 // TODO: Remove with connectivit refactor
373 local: self.localhost == name,455 local: self.localhost == name,
374 session: OnceLock::new(),456 session: OnceLock::new(),
457 deploy_kind: OnceCell::new(),
375 })458 })
376 }459 }
377 pub async fn list_hosts(&self) -> Result<Vec<ConfigHost>> {460 pub async fn list_hosts(&self) -> Result<Vec<ConfigHost>> {
modifiedmodules/nixos/meta.nixdiffbeforeafterboth
--- a/modules/nixos/meta.nix
+++ b/modules/nixos/meta.nix
@@ -13,5 +13,13 @@
   ];
 
   # Version of environment (fleet scripts such as rollback) already installed on the host
-  config.environment.etc.FLEET_HOST.text = "1";
+  config = {
+    environment.etc.FLEET_HOST.text = "1";
+
+    # Flake/nix command support is assumed by fleet, lets add it here to avoid potential problems.
+    nix.settings.experimental-features = [
+      "nix-command"
+      "flakes"
+    ];
+  };
 }