git.delta.rocks / jrsonnet / refs/commits / 3627c6c6df00

difftreelog

feat nixos-install target

Lach2025-04-06parent: #3972fee.patch.diff
in: trunk

6 files changed

modifiedCargo.lockdiffbeforeafterboth
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -924,7 +924,6 @@
  "hostname",
  "human-repr",
  "indicatif",
- "indoc",
  "itertools 0.13.0",
  "nix-eval",
  "nixlike",
@@ -958,6 +957,7 @@
  "fleet-shared",
  "futures",
  "hostname",
+ "indoc",
  "itertools 0.13.0",
  "nix-eval",
  "nixlike",
modifiedcmds/fleet/Cargo.tomldiffbeforeafterboth
--- a/cmds/fleet/Cargo.toml
+++ b/cmds/fleet/Cargo.toml
@@ -47,7 +47,6 @@
 nix-eval.workspace = true
 nom = "7.1.3"
 fleet-base = { version = "0.1.0", path = "../../crates/fleet-base" }
-indoc = "2.0.6"
 
 [features]
 default = ["indicatif"]
modifiedcmds/fleet/src/cmds/build_systems.rsdiffbeforeafterboth
--- a/cmds/fleet/src/cmds/build_systems.rs
+++ b/cmds/fleet/src/cmds/build_systems.rs
@@ -1,9 +1,9 @@
-use std::{env::current_dir, os::unix::fs::symlink, path::PathBuf, str::FromStr, time::Duration};
+use std::{env::current_dir, os::unix::fs::symlink, path::PathBuf, time::Duration};
 
 use anyhow::{anyhow, bail, Result};
 use clap::{Parser, ValueEnum};
 use fleet_base::{
-	host::{Config, ConfigHost},
+	host::{Config, ConfigHost, DeployKind},
 	opts::FleetOpts,
 };
 use itertools::Itertools as _;
@@ -131,6 +131,13 @@
 	specialisation: Option<String>,
 	disable_rollback: bool,
 ) -> Result<()> {
+	let deploy_kind = host.deploy_kind().await?;
+	if deploy_kind == DeployKind::NixosInstall
+		&& !matches!(action, DeployAction::Boot | DeployAction::Upload)
+	{
+		bail!("nixos-install deploy kind only supports boot and upload actions");
+	}
+
 	let mut failed = false;
 
 	// TODO: Lockfile, to prevent concurrent system switch?
@@ -177,39 +184,74 @@
 			}
 		}
 	}
-
-	if action.should_switch_profile() && !failed {
-		info!("switching system profile generation");
-		// It would also be possible to update profile atomically during copy:
-		// https://github.com/NixOS/nix/pull/11657
-		let mut cmd = host.cmd("nix").await?;
-		cmd.arg("build");
-		cmd.comparg("--profile", "/nix/var/nix/profiles/system");
-		cmd.arg(&built);
-		if let Err(e) = cmd.sudo().run_nix().await {
-			error!("failed to switch system profile generation: {e}");
+	if deploy_kind == DeployKind::NixosInstall {
+		info!(
+			"running nixos-install to switch profile, install bootloader, and perform activation"
+		);
+		let mut cmd = host.cmd("nixos-install").await?;
+		cmd.arg("--system").arg(&built).args([
+			// Channels here aren't fleet host system channels, but channels embedded in installation cd, which might be old.
+			// It is possible to copy host channels, but I would prefer non-flake nix just to be unsupported.
+			"--no-channel-copy",
+			"--root",
+			"/mnt",
+		]);
+		if let Err(e) = cmd.sudo().run().await {
+			error!("failed to execute nixos-install: {e}");
 			failed = true;
 		}
-	}
+	} else {
+		if action.should_switch_profile() && !failed {
+			info!("switching system profile generation");
 
-	// FIXME: Connection might be disconnected after activation run
+			// To avoid even more problems, using nixos-install for now.
+			// // nix build is unable to work with --store argument for some reason, and nix until 2.26 didn't support copy with --profile argument,
+			// // falling back to using nix-env command
+			// // After stable NixOS starts using 2.26 - use `nix --store /mnt copy --from /mnt --profile ...` here, and instead of nix build below.
+			// let mut cmd = host.cmd("nix-env").await?;
+			// cmd.args([
+			// 	"--store",
+			// 	"/mnt",
+			// 	"--profile",
+			// 	"/mnt/nix/var/nix/profiles/system",
+			// 	"--set",
+			// ])
+			// .arg(&built);
+			// if let Err(e) = cmd.sudo().run_nix().await {
+			// 	error!("failed to switch system profile generation: {e}");
+			// 	failed = true;
+			// }
+			// It would also be possible to update profile atomically during copy:
+			// https://github.com/NixOS/nix/pull/11657
+			let mut cmd = host.nix_cmd().await?;
+			cmd.arg("build");
+			cmd.comparg("--profile", "/nix/var/nix/profiles/system");
+			cmd.arg(&built);
+			if let Err(e) = cmd.sudo().run_nix().await {
+				error!("failed to switch system profile generation: {e}");
+				failed = true;
+			}
+		}
 
-	if action.should_activate() && !failed {
-		let _span = info_span!("activating").entered();
-		info!("executing activation script");
-		let specialised = if let Some(specialisation) = specialisation {
-			let mut specialised = built.join("specialisation");
-			specialised.push(specialisation);
-			specialised
-		} else {
-			built.clone()
-		};
-		let switch_script = specialised.join("bin/switch-to-configuration");
-		let mut cmd = host.cmd(switch_script).in_current_span().await?;
-		cmd.arg(action.name().expect("upload.should_activate == false"));
-		if let Err(e) = cmd.sudo().run().in_current_span().await {
-			error!("failed to activate: {e}");
-			failed = true;
+		// FIXME: Connection might be disconnected after activation run
+
+		if action.should_activate() && !failed {
+			let _span = info_span!("activating").entered();
+			info!("executing activation script");
+			let specialised = if let Some(specialisation) = specialisation {
+				let mut specialised = built.join("specialisation");
+				specialised.push(specialisation);
+				specialised
+			} else {
+				built.clone()
+			};
+			let switch_script = specialised.join("bin/switch-to-configuration");
+			let mut cmd = host.cmd(switch_script).in_current_span().await?;
+			cmd.arg(action.name().expect("upload.should_activate == false"));
+			if let Err(e) = cmd.sudo().run().in_current_span().await {
+				error!("failed to activate: {e}");
+				failed = true;
+			}
 		}
 	}
 	if action.should_create_rollback_marker() {
@@ -330,24 +372,6 @@
 		drop(batch);
 		set.await;
 		Ok(())
-	}
-}
-
-#[derive(Clone, PartialEq, Copy)]
-enum DeployKind {
-	// NixOS => NixOS managed by fleet
-	UpgradeToFleet,
-	// NixOS managed by fleet => NixOS managed by fleet
-	Fleet,
-}
-impl FromStr for DeployKind {
-	type Err = anyhow::Error;
-	fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
-		match s {
-			"upgrade-to-fleet" => Ok(Self::UpgradeToFleet),
-			"fleet" => Ok(Self::Fleet),
-			v => bail!("unknown deploy_kind: {v}; expected on of \"upgrade-to-fleet\", \"fleet\""),
-		}
 	}
 }
 
@@ -367,8 +391,9 @@
 			let local_host = config.local_host();
 			let opts = opts.clone();
 			let batch = batch.clone();
-			let mut deploy_kind: Option<DeployKind> =
-				opts.action_attr(&host, "deploy_kind").await?;
+			if let Some(deploy_kind) = opts.action_attr::<DeployKind>(&host, "deploy_kind").await? {
+				host.set_deploy_kind(deploy_kind);
+			};
 
 			set.spawn_local(
 				(async move {
@@ -381,28 +406,14 @@
 								return;
 							}
 						};
-					if deploy_kind == None {
-						let is_fleet_managed = match host.file_exists("/etc/FLEET_HOST").await {
-							Ok(v) => v,
-							Err(e) => {
-								error!("failed to query remote system kind: {}", e);
-								return;
-							},
-						};
-						if !is_fleet_managed {
-							error!(indoc::indoc!{"
-								host is not marked as managed by fleet
-								if you're not trying to lustrate/install system from scratch,
-								you should either
-									1. manually create /etc/FLEET_HOST file on the target host,
-									2. use ?deploy_kind=fleet host argument if you're upgrading from older version of fleet
-									3. use ?deploy_kind=upgrade_to_fleet if you're upgrading from plain nixos to fleet-managed nixos
-							"});
+
+					let deploy_kind = match host.deploy_kind().await {
+						Ok(v) => v,
+						Err(e) => {
+							error!("failed to query target deploy kind: {e}");
 							return;
 						}
-						deploy_kind = Some(DeployKind::Fleet);
-					}
-					let deploy_kind = deploy_kind.expect("deploy_kind is set");
+					};
 
 					// TODO: Make disable_rollback a host attribute instead
 					let mut disable_rollback = self.disable_rollback;
modifiedcrates/fleet-base/Cargo.tomldiffbeforeafterboth
--- a/crates/fleet-base/Cargo.toml
+++ b/crates/fleet-base/Cargo.toml
@@ -13,6 +13,7 @@
 fleet-shared.workspace = true
 futures = "0.3.30"
 hostname = "0.4.0"
+indoc = "2.0.6"
 itertools = "0.13.0"
 nix-eval.workspace = true
 nixlike.workspace = true
modifiedcrates/fleet-base/src/host.rsdiffbeforeafterboth
--- a/crates/fleet-base/src/host.rs
+++ b/crates/fleet-base/src/host.rs
@@ -58,11 +58,35 @@
 	Su,
 }
 
+#[derive(Clone, PartialEq, Copy)]
+pub enum DeployKind {
+	/// NixOS => NixOS managed by fleet
+	UpgradeToFleet,
+	/// NixOS managed by fleet => NixOS managed by fleet
+	Fleet,
+	/// Remote host has /mnt, /mnt/boot mounted,
+	/// generated config is added to fleet configuration.
+	NixosInstall,
+}
+
+impl FromStr for DeployKind {
+	type Err = anyhow::Error;
+	fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
+		match s {
+			"upgrade-to-fleet" => Ok(Self::UpgradeToFleet),
+			"fleet" => Ok(Self::Fleet),
+			"nixos-install" => Ok(Self::NixosInstall),
+			v => bail!("unknown deploy_kind: {v}; expected on of \"upgrade-to-fleet\", \"fleet\", \"nixos-install\""),
+		}
+	}
+}
 pub struct ConfigHost {
 	config: Config,
 	pub name: String,
 	groups: OnceCell<Vec<String>>,
 
+	deploy_kind: OnceCell<DeployKind>,
+
 	pub host_config: Option<Value>,
 	pub nixos_config: OnceCell<Value>,
 	pub pkgs_override: Option<Value>,
@@ -73,6 +97,40 @@
 }
 // TODO: Move command helpers away with connectivity refactor
 impl ConfigHost {
+	pub fn set_deploy_kind(&self, kind: DeployKind) {
+		self.deploy_kind
+			.set(kind)
+			.ok()
+			.expect("deploy kind is already set");
+	}
+	pub async fn deploy_kind(&self) -> Result<DeployKind> {
+		if let Some(kind) = self.deploy_kind.get() {
+			return Ok(kind.clone());
+		}
+		let is_fleet_managed = match self.file_exists("/etc/FLEET_HOST").await {
+			Ok(v) => v,
+			Err(e) => {
+				bail!("failed to query remote system kind: {}", e);
+			}
+		};
+		if !is_fleet_managed {
+			bail!(indoc::indoc! {"
+				host is not marked as managed by fleet
+				if you're not trying to lustrate/install system from scratch,
+				you should either
+					1. manually create /etc/FLEET_HOST file on the target host,
+					2. use ?deploy_kind=fleet host argument if you're upgrading from older version of fleet
+					3. use ?deploy_kind=upgrade_to_fleet if you're upgrading from plain nixos to fleet-managed nixos
+			"});
+		}
+		// TOCTOU is possible
+		let _ = self.deploy_kind.set(DeployKind::Fleet);
+		Ok(self
+			.deploy_kind
+			.get()
+			.expect("deploy kind is just set")
+			.clone())
+	}
 	pub async fn escalation_strategy(&self) -> Result<EscalationStrategy> {
 		// Prefer sudo, as run0 has some gotchas with polkit
 		// and too many repeating prompts.
@@ -189,6 +247,16 @@
 			Ok(MyCommand::new_on(escalation, cmd, session))
 		}
 	}
+	pub async fn nix_cmd(&self) -> Result<MyCommand> {
+		let mut nix = self.cmd("nix").await?;
+		nix.args([
+			"--extra-experimental-features",
+			"nix-command",
+			"--extra-experimental-features",
+			"flakes",
+		]);
+		Ok(nix)
+	}
 
 	pub async fn decrypt(&self, data: SecretData) -> Result<Vec<u8>> {
 		ensure!(data.encrypted, "secret is not encrypted");
@@ -231,10 +299,23 @@
 			EscalationStrategy::Su,
 			"nix",
 		);
-		nix.arg("copy")
-			.arg("--substitute-on-destination")
-			.comparg("--to", format!("ssh-ng://{}", self.name))
-			.arg(path);
+		nix.arg("copy").arg("--substitute-on-destination");
+
+		match self.deploy_kind().await? {
+			DeployKind::Fleet | DeployKind::UpgradeToFleet => {
+				nix.comparg("--to", format!("ssh-ng://{}", self.name));
+			}
+			DeployKind::NixosInstall => {
+				nix
+					// Signature checking makes no sense with remote-store store argument set, as we're not even interacting with remote nix daemon
+					.arg("--no-check-sigs")
+					.comparg(
+						"--to",
+						format!("ssh-ng://root@{}-install?remote-store=/mnt", self.name),
+					);
+			}
+		}
+		nix.arg(path);
 		nix.run_nix().await.context("nix copy")?;
 		Ok(path.to_owned())
 	}
@@ -354,6 +435,7 @@
 
 			local: true,
 			session: OnceLock::new(),
+			deploy_kind: OnceCell::new(),
 		}
 	}
 
@@ -372,6 +454,7 @@
 			// TODO: Remove with connectivit refactor
 			local: self.localhost == name,
 			session: OnceLock::new(),
+			deploy_kind: OnceCell::new(),
 		})
 	}
 	pub async fn list_hosts(&self) -> Result<Vec<ConfigHost>> {
modifiedmodules/nixos/meta.nixdiffbeforeafterboth
after · modules/nixos/meta.nix
1{ lib, ... }:2let3  inherit (lib.modules) mkRemovedOptionModule;4in5{6  imports = [7    (mkRemovedOptionModule [ "tags" ]8      "tags are now defined at the host level, not the nixos system level for fast filtering without evaluating unnecessary hosts."9    )10    (mkRemovedOptionModule [11      "network"12    ] "network is now defined at the host level, not the nixos system level")13  ];1415  # Version of environment (fleet scripts such as rollback) already installed on the host16  config = {17    environment.etc.FLEET_HOST.text = "1";1819    # Flake/nix command support is assumed by fleet, lets add it here to avoid potential problems.20    nix.settings.experimental-features = [21      "nix-command"22      "flakes"23    ];24  };25}