git.delta.rocks / jrsonnet / refs/commits / 3627c6c6df00

difftreelog

feat nixos-install target

Lach2025-04-05parent: #3972fee.patch.diff
in: trunk

6 files changed

modifiedCargo.lockdiffbeforeafterboth
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -924,7 +924,6 @@
  "hostname",
  "human-repr",
  "indicatif",
- "indoc",
  "itertools 0.13.0",
  "nix-eval",
  "nixlike",
@@ -958,6 +957,7 @@
  "fleet-shared",
  "futures",
  "hostname",
+ "indoc",
  "itertools 0.13.0",
  "nix-eval",
  "nixlike",
modifiedcmds/fleet/Cargo.tomldiffbeforeafterboth
--- a/cmds/fleet/Cargo.toml
+++ b/cmds/fleet/Cargo.toml
@@ -47,7 +47,6 @@
 nix-eval.workspace = true
 nom = "7.1.3"
 fleet-base = { version = "0.1.0", path = "../../crates/fleet-base" }
-indoc = "2.0.6"
 
 [features]
 default = ["indicatif"]
modifiedcmds/fleet/src/cmds/build_systems.rsdiffbeforeafterboth
--- a/cmds/fleet/src/cmds/build_systems.rs
+++ b/cmds/fleet/src/cmds/build_systems.rs
@@ -1,9 +1,9 @@
-use std::{env::current_dir, os::unix::fs::symlink, path::PathBuf, str::FromStr, time::Duration};
+use std::{env::current_dir, os::unix::fs::symlink, path::PathBuf, time::Duration};
 
 use anyhow::{anyhow, bail, Result};
 use clap::{Parser, ValueEnum};
 use fleet_base::{
-	host::{Config, ConfigHost},
+	host::{Config, ConfigHost, DeployKind},
 	opts::FleetOpts,
 };
 use itertools::Itertools as _;
@@ -131,6 +131,13 @@
 	specialisation: Option<String>,
 	disable_rollback: bool,
 ) -> Result<()> {
+	let deploy_kind = host.deploy_kind().await?;
+	if deploy_kind == DeployKind::NixosInstall
+		&& !matches!(action, DeployAction::Boot | DeployAction::Upload)
+	{
+		bail!("nixos-install deploy kind only supports boot and upload actions");
+	}
+
 	let mut failed = false;
 
 	// TODO: Lockfile, to prevent concurrent system switch?
@@ -177,39 +184,74 @@
 			}
 		}
 	}
-
-	if action.should_switch_profile() && !failed {
-		info!("switching system profile generation");
-		// It would also be possible to update profile atomically during copy:
-		// https://github.com/NixOS/nix/pull/11657
-		let mut cmd = host.cmd("nix").await?;
-		cmd.arg("build");
-		cmd.comparg("--profile", "/nix/var/nix/profiles/system");
-		cmd.arg(&built);
-		if let Err(e) = cmd.sudo().run_nix().await {
-			error!("failed to switch system profile generation: {e}");
+	if deploy_kind == DeployKind::NixosInstall {
+		info!(
+			"running nixos-install to switch profile, install bootloader, and perform activation"
+		);
+		let mut cmd = host.cmd("nixos-install").await?;
+		cmd.arg("--system").arg(&built).args([
+			// Channels here aren't fleet host system channels, but channels embedded in installation cd, which might be old.
+			// It is possible to copy host channels, but I would prefer non-flake nix just to be unsupported.
+			"--no-channel-copy",
+			"--root",
+			"/mnt",
+		]);
+		if let Err(e) = cmd.sudo().run().await {
+			error!("failed to execute nixos-install: {e}");
 			failed = true;
 		}
-	}
+	} else {
+		if action.should_switch_profile() && !failed {
+			info!("switching system profile generation");
 
-	// FIXME: Connection might be disconnected after activation run
+			// To avoid even more problems, using nixos-install for now.
+			// // nix build is unable to work with --store argument for some reason, and nix until 2.26 didn't support copy with --profile argument,
+			// // falling back to using nix-env command
+			// // After stable NixOS starts using 2.26 - use `nix --store /mnt copy --from /mnt --profile ...` here, and instead of nix build below.
+			// let mut cmd = host.cmd("nix-env").await?;
+			// cmd.args([
+			// 	"--store",
+			// 	"/mnt",
+			// 	"--profile",
+			// 	"/mnt/nix/var/nix/profiles/system",
+			// 	"--set",
+			// ])
+			// .arg(&built);
+			// if let Err(e) = cmd.sudo().run_nix().await {
+			// 	error!("failed to switch system profile generation: {e}");
+			// 	failed = true;
+			// }
+			// It would also be possible to update profile atomically during copy:
+			// https://github.com/NixOS/nix/pull/11657
+			let mut cmd = host.nix_cmd().await?;
+			cmd.arg("build");
+			cmd.comparg("--profile", "/nix/var/nix/profiles/system");
+			cmd.arg(&built);
+			if let Err(e) = cmd.sudo().run_nix().await {
+				error!("failed to switch system profile generation: {e}");
+				failed = true;
+			}
+		}
 
-	if action.should_activate() && !failed {
-		let _span = info_span!("activating").entered();
-		info!("executing activation script");
-		let specialised = if let Some(specialisation) = specialisation {
-			let mut specialised = built.join("specialisation");
-			specialised.push(specialisation);
-			specialised
-		} else {
-			built.clone()
-		};
-		let switch_script = specialised.join("bin/switch-to-configuration");
-		let mut cmd = host.cmd(switch_script).in_current_span().await?;
-		cmd.arg(action.name().expect("upload.should_activate == false"));
-		if let Err(e) = cmd.sudo().run().in_current_span().await {
-			error!("failed to activate: {e}");
-			failed = true;
+		// FIXME: Connection might be disconnected after activation run
+
+		if action.should_activate() && !failed {
+			let _span = info_span!("activating").entered();
+			info!("executing activation script");
+			let specialised = if let Some(specialisation) = specialisation {
+				let mut specialised = built.join("specialisation");
+				specialised.push(specialisation);
+				specialised
+			} else {
+				built.clone()
+			};
+			let switch_script = specialised.join("bin/switch-to-configuration");
+			let mut cmd = host.cmd(switch_script).in_current_span().await?;
+			cmd.arg(action.name().expect("upload.should_activate == false"));
+			if let Err(e) = cmd.sudo().run().in_current_span().await {
+				error!("failed to activate: {e}");
+				failed = true;
+			}
 		}
 	}
 	if action.should_create_rollback_marker() {
@@ -330,24 +372,6 @@
 		drop(batch);
 		set.await;
 		Ok(())
-	}
-}
-
-#[derive(Clone, PartialEq, Copy)]
-enum DeployKind {
-	// NixOS => NixOS managed by fleet
-	UpgradeToFleet,
-	// NixOS managed by fleet => NixOS managed by fleet
-	Fleet,
-}
-impl FromStr for DeployKind {
-	type Err = anyhow::Error;
-	fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
-		match s {
-			"upgrade-to-fleet" => Ok(Self::UpgradeToFleet),
-			"fleet" => Ok(Self::Fleet),
-			v => bail!("unknown deploy_kind: {v}; expected on of \"upgrade-to-fleet\", \"fleet\""),
-		}
 	}
 }
 
@@ -367,8 +391,9 @@
 			let local_host = config.local_host();
 			let opts = opts.clone();
 			let batch = batch.clone();
-			let mut deploy_kind: Option<DeployKind> =
-				opts.action_attr(&host, "deploy_kind").await?;
+			if let Some(deploy_kind) = opts.action_attr::<DeployKind>(&host, "deploy_kind").await? {
+				host.set_deploy_kind(deploy_kind);
+			};
 
 			set.spawn_local(
 				(async move {
@@ -381,28 +406,14 @@
 								return;
 							}
 						};
-					if deploy_kind == None {
-						let is_fleet_managed = match host.file_exists("/etc/FLEET_HOST").await {
-							Ok(v) => v,
-							Err(e) => {
-								error!("failed to query remote system kind: {}", e);
-								return;
-							},
-						};
-						if !is_fleet_managed {
-							error!(indoc::indoc!{"
-								host is not marked as managed by fleet
-								if you're not trying to lustrate/install system from scratch,
-								you should either
-									1. manually create /etc/FLEET_HOST file on the target host,
-									2. use ?deploy_kind=fleet host argument if you're upgrading from older version of fleet
-									3. use ?deploy_kind=upgrade_to_fleet if you're upgrading from plain nixos to fleet-managed nixos
-							"});
+
+					let deploy_kind = match host.deploy_kind().await {
+						Ok(v) => v,
+						Err(e) => {
+							error!("failed to query target deploy kind: {e}");
 							return;
 						}
-						deploy_kind = Some(DeployKind::Fleet);
-					}
-					let deploy_kind = deploy_kind.expect("deploy_kind is set");
+					};
 
 					// TODO: Make disable_rollback a host attribute instead
 					let mut disable_rollback = self.disable_rollback;
modifiedcrates/fleet-base/Cargo.tomldiffbeforeafterboth
--- a/crates/fleet-base/Cargo.toml
+++ b/crates/fleet-base/Cargo.toml
@@ -13,6 +13,7 @@
 fleet-shared.workspace = true
 futures = "0.3.30"
 hostname = "0.4.0"
+indoc = "2.0.6"
 itertools = "0.13.0"
 nix-eval.workspace = true
 nixlike.workspace = true
modifiedcrates/fleet-base/src/host.rsdiffbeforeafterboth
before · crates/fleet-base/src/host.rs
1use std::{2	cell::OnceCell,3	collections::BTreeSet,4	ffi::{OsStr, OsString},5	fmt::Display,6	io::Write,7	ops::Deref,8	path::PathBuf,9	str::FromStr,10	sync::{Arc, Mutex, MutexGuard, OnceLock},11};1213use anyhow::{anyhow, bail, ensure, Context, Result};14use fleet_shared::SecretData;15use nix_eval::{nix_go, nix_go_json, util::assert_warn, NixSession, Value};16use openssh::SessionBuilder;17use serde::de::DeserializeOwned;18use tempfile::NamedTempFile;1920use crate::{21	command::MyCommand,22	fleetdata::{FleetData, FleetSecret, FleetSharedSecret},23};2425pub struct FleetConfigInternals {26	pub local_system: String,27	pub directory: PathBuf,28	pub data: Mutex<FleetData>,29	pub nix_args: Vec<OsString>,30	/// fleet_config.config31	pub config_field: Value,32	// TODO: Remove with connectivity refactor33	pub localhost: String,3435	/// import nixpkgs {system = local};36	pub default_pkgs: Value,37	pub nixpkgs: Value,3839	pub nix_session: NixSession,40}4142// TODO: Make field not pub43#[derive(Clone)]44pub struct Config(pub Arc<FleetConfigInternals>);4546impl Deref for Config {47	type Target = FleetConfigInternals;4849	fn deref(&self) -> &Self::Target {50		&self.051	}52}5354#[derive(Clone, Copy, Debug)]55pub enum EscalationStrategy {56	Sudo,57	Run0,58	Su,59}6061pub struct ConfigHost {62	config: Config,63	pub name: String,64	groups: OnceCell<Vec<String>>,6566	pub host_config: Option<Value>,67	pub nixos_config: OnceCell<Value>,68	pub pkgs_override: Option<Value>,6970	// TODO: Move command helpers away with connectivity refactor71	pub local: bool,72	pub session: OnceLock<Arc<openssh::Session>>,73}74// TODO: Move command helpers away with connectivity refactor75impl ConfigHost {76	pub async fn escalation_strategy(&self) -> Result<EscalationStrategy> {77		// Prefer sudo, as run0 has some gotchas with polkit78		// and too many repeating prompts.79		if (self.find_in_path("sudo").await).is_ok() {80			return Ok(EscalationStrategy::Sudo);81		}82		if (self.find_in_path("run0").await).is_ok() {83			return Ok(EscalationStrategy::Run0);84		}85		Ok(EscalationStrategy::Su)86	}87	async fn open_session(&self) -> Result<Arc<openssh::Session>> {88		assert!(!self.local, "do not open ssh connection to local session");89		// FIXME: TOCTOU90		if let Some(session) = &self.session.get() {91			return Ok((*session).clone());92		};93		let session = SessionBuilder::default();94		let session = session95			.connect(&self.name)96			.await97			.map_err(|e| anyhow!("ssh error while connecting to {}: {e}", self.name))?;98		let session = Arc::new(session);99		self.session.set(session.clone()).expect("TOCTOU happened");100		Ok(session)101	}102	pub async fn mktemp_dir(&self) -> Result<String> {103		let mut cmd = self.cmd("mktemp").await?;104		cmd.arg("-d");105		let path = cmd.run_string().await?;106		Ok(path.trim_end().to_owned())107	}108	pub async fn file_exists(&self, path: impl AsRef<OsStr>) -> Result<bool> {109		let mut cmd = self.cmd("sh").await?;110		cmd.arg("-c")111			.arg("test -e \"$1\" && echo true || echo false")112			.arg("_")113			.arg(path);114		Ok(cmd.run_value().await?)115	}116	pub async fn read_file_bin(&self, path: impl AsRef<OsStr>) -> Result<Vec<u8>> {117		let mut cmd = self.cmd("cat").await?;118		cmd.arg(path);119		cmd.run_bytes().await120	}121	pub async fn read_file_text(&self, path: impl AsRef<OsStr>) -> Result<String> {122		let mut cmd = self.cmd("cat").await?;123		cmd.arg(path);124		cmd.run_string().await125	}126	pub async fn read_dir(&self, path: impl AsRef<OsStr>) -> Result<Vec<String>> {127		let mut cmd = self.cmd("ls").await?;128		cmd.arg(path);129		let out = cmd.run_string().await?;130		let mut lines = out.split('\n');131		if let Some(last) = lines.next_back() {132			ensure!(last.is_empty(), "output of ls should end with newline");133		}134		Ok(lines.map(ToOwned::to_owned).collect())135	}136	#[allow(dead_code)]137	pub async fn read_file_json<D: DeserializeOwned>(&self, path: impl AsRef<OsStr>) -> Result<D> {138		let text = self.read_file_text(path).await?;139		Ok(serde_json::from_str(&text)?)140	}141	pub async fn read_env(&self, env: &str) -> Result<String> {142		let mut cmd = self.cmd("printenv").await?;143		cmd.arg(env);144		cmd.run_string().await145	}146	pub async fn find_in_path(&self, command: &str) -> Result<String> {147		// // `which` is not a part of coreutils, and it might not exist on machine.148		// let path = self.read_env("PATH").await?;149		// // Assuming delimiter is :, we don't work with windows host, this check will be much150		// // more sophisticated in remowt backend (and quicker, since actual PATH search will be done on remote machine)151		// for ele in path.split(':') {152		// 	let test_path = format!("{ele}/{cmd}");153		// 	test -x etc154		// }155		// let mut cmd = self.cmd("printenv").await?;156		// cmd.arg(env);157		// Ok(cmd.run_string().await?)158		// Assuming this is an environment issue if which doesn't exist, will be fixed with remowt.159		let mut cmd = self160			.cmd_escalation(161				// Not used162				EscalationStrategy::Su,163				"which",164			)165			.await?;166		cmd.arg(command);167		cmd.run_string().await168	}169	pub async fn read_file_value<D: FromStr>(&self, path: impl AsRef<OsStr>) -> Result<D>170	where171		<D as FromStr>::Err: Display,172	{173		let text = self.read_file_text(path).await?;174		D::from_str(&text).map_err(|e| anyhow!("failed to parse value: {e}"))175	}176	pub async fn cmd(&self, cmd: impl AsRef<OsStr>) -> Result<MyCommand> {177		self.cmd_escalation(self.escalation_strategy().await?, cmd)178			.await179	}180	pub async fn cmd_escalation(181		&self,182		escalation: EscalationStrategy,183		cmd: impl AsRef<OsStr>,184	) -> Result<MyCommand> {185		if self.local {186			Ok(MyCommand::new(escalation, cmd))187		} else {188			let session = self.open_session().await?;189			Ok(MyCommand::new_on(escalation, cmd, session))190		}191	}192193	pub async fn decrypt(&self, data: SecretData) -> Result<Vec<u8>> {194		ensure!(data.encrypted, "secret is not encrypted");195		let mut cmd = self.cmd("fleet-install-secrets").await?;196		cmd.arg("decrypt").eqarg("--secret", data.to_string());197		let encoded = cmd198			.sudo()199			.run_string()200			.await201			.context("failed to call remote host for decrypt")?;202		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;203		ensure!(!data.encrypted, "secret came out encrypted");204		Ok(data.data)205	}206	pub async fn reencrypt(&self, data: SecretData, targets: Vec<String>) -> Result<SecretData> {207		ensure!(data.encrypted, "secret is not encrypted");208		let mut cmd = self.cmd("fleet-install-secrets").await?;209		cmd.arg("reencrypt").eqarg("--secret", data.to_string());210		for target in targets {211			let key = self.config.key(&target).await?;212			cmd.eqarg("--targets", key);213		}214		let encoded = cmd215			.sudo()216			.run_string()217			.await218			.context("failed to call remote host for decrypt")?;219		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;220		ensure!(data.encrypted, "secret came out not encrypted");221		Ok(data)222	}223	/// Returns path for futureproofing, as path might change i.e on conversion to CA224	pub async fn remote_derivation(&self, path: &PathBuf) -> Result<PathBuf> {225		if self.local {226			// Path is located locally, thus already trusted.227			return Ok(path.to_owned());228		}229		let mut nix = MyCommand::new(230			// Not used231			EscalationStrategy::Su,232			"nix",233		);234		nix.arg("copy")235			.arg("--substitute-on-destination")236			.comparg("--to", format!("ssh-ng://{}", self.name))237			.arg(path);238		nix.run_nix().await.context("nix copy")?;239		Ok(path.to_owned())240	}241	pub async fn systemctl_stop(&self, name: &str) -> Result<()> {242		let mut cmd = self.cmd("systemctl").await?;243		cmd.arg("stop").arg(name);244		cmd.sudo().run().await245	}246	pub async fn systemctl_start(&self, name: &str) -> Result<()> {247		let mut cmd = self.cmd("systemctl").await?;248		cmd.arg("start").arg(name);249		cmd.sudo().run().await250	}251252	pub async fn rm_file(&self, path: impl AsRef<OsStr>, sudo: bool) -> Result<()> {253		let mut cmd = self.cmd("rm").await?;254		cmd.arg("-f").arg(path);255		if sudo {256			cmd = cmd.sudo()257		}258		cmd.run().await259	}260}261impl ConfigHost {262	// TOCTOU is possible here in case if config is changed, but this case is not handled anywhere anyway,263	// assuming getting tags always returns the same value.264	pub async fn tags(&self) -> Result<Vec<String>> {265		if let Some(v) = self.groups.get() {266			return Ok(v.clone());267		}268		let Some(host_config) = &self.host_config else {269			return Ok(vec![]);270		};271		let tags: Vec<String> = nix_go_json!(host_config.tags);272273		let _ = self.groups.set(tags.clone());274275		Ok(tags)276	}277	pub async fn nixos_config(&self) -> Result<Value> {278		if let Some(v) = self.nixos_config.get() {279			return Ok(v.clone());280		}281		let Some(host_config) = &self.host_config else {282			bail!("local host has no nixos_config");283		};284		let nixos_config = nix_go!(host_config.nixos.config);285		assert_warn("nixos config evaluation", &nixos_config).await?;286287		let _ = self.nixos_config.set(nixos_config.clone());288289		Ok(nixos_config)290	}291292	pub async fn list_configured_secrets(&self) -> Result<Vec<String>> {293		let nixos = self.nixos_config().await?;294		let secrets = nix_go!(nixos.secrets);295		let mut out = Vec::new();296		for name in secrets.list_fields().await? {297			let secret = nix_go!(secrets[{ name }]);298			let is_shared: bool = nix_go_json!(secret.shared);299			if is_shared {300				continue;301			}302			out.push(name);303		}304		Ok(out)305	}306	pub async fn secret_field(&self, name: &str) -> Result<Value> {307		let nixos = self.nixos_config().await?;308		Ok(nix_go!(nixos.secrets[{ name }]))309	}310311	/// Packages for this host, resolved with nixpkgs overlays312	pub async fn pkgs(&self) -> Result<Value> {313		if let Some(value) = &self.pkgs_override {314			return Ok(value.clone());315		}316		let Some(host_config) = &self.host_config else {317			bail!("local host has no host_config");318		};319		// TODO: Should nixos.options be cached?320		Ok(nix_go!(host_config.nixos.options._module.args.value.pkgs))321	}322}323324impl Config {325	pub async fn tagged_hostnames(&self, tag: &str) -> Result<Vec<String>> {326		let config = &self.config_field;327		let tagged: Vec<String> = nix_go_json!(config.taggedWith[{ tag }]);328		Ok(tagged)329	}330	pub async fn expand_owner_set(&self, owners: Vec<String>) -> Result<BTreeSet<String>> {331		let mut out = BTreeSet::new();332		for owner in owners {333			if let Some(tag) = owner.strip_prefix('@') {334				let hosts = self.tagged_hostnames(tag).await?;335				out.extend(hosts);336			} else {337				out.insert(owner);338			}339		}340		Ok(out)341	}342	pub fn local_host(&self) -> ConfigHost {343		ConfigHost {344			config: self.clone(),345			name: "<virtual localhost>".to_owned(),346			host_config: None,347			nixos_config: OnceCell::new(),348			groups: {349				let cell = OnceCell::new();350				let _ = cell.set(vec![]);351				cell352			},353			pkgs_override: Some(self.default_pkgs.clone()),354355			local: true,356			session: OnceLock::new(),357		}358	}359360	pub async fn host(&self, name: &str) -> Result<ConfigHost> {361		let config = &self.config_field;362		let host_config = nix_go!(config.hosts[{ name }]);363364		Ok(ConfigHost {365			config: self.clone(),366			name: name.to_owned(),367			host_config: Some(host_config),368			nixos_config: OnceCell::new(),369			groups: OnceCell::new(),370			pkgs_override: None,371372			// TODO: Remove with connectivit refactor373			local: self.localhost == name,374			session: OnceLock::new(),375		})376	}377	pub async fn list_hosts(&self) -> Result<Vec<ConfigHost>> {378		let config = &self.config_field;379		let names = nix_go!(config.hosts).list_fields().await?;380		let mut out = vec![];381		for name in names {382			out.push(self.host(&name).await?);383		}384		Ok(out)385	}386	// TODO: Replace usages with .host().nixos_config387	pub async fn system_config(&self, host: &str) -> Result<Value> {388		let fleet_field = &self.config_field;389		Ok(nix_go!(fleet_field.hosts[{ host }].nixos.config))390	}391392	/// Shared secrets configured in fleet.nix or in flake393	pub async fn list_configured_shared(&self) -> Result<Vec<String>> {394		let config_field = &self.config_field;395		Ok(nix_go!(config_field.sharedSecrets).list_fields().await?)396	}397	/// Shared secrets configured in fleet.nix398	pub fn list_shared(&self) -> Vec<String> {399		let data = self.data();400		data.shared_secrets.keys().cloned().collect()401	}402	pub fn has_shared(&self, name: &str) -> bool {403		let data = self.data();404		data.shared_secrets.contains_key(name)405	}406	pub fn replace_shared(&self, name: String, shared: FleetSharedSecret) {407		let mut data = self.data_mut();408		data.shared_secrets.insert(name.to_owned(), shared);409	}410	pub fn remove_shared(&self, secret: &str) {411		let mut data = self.data_mut();412		data.shared_secrets.remove(secret);413	}414415	pub fn list_secrets(&self, host: &str) -> Vec<String> {416		let data = self.data();417		let Some(secrets) = data.host_secrets.get(host) else {418			return Vec::new();419		};420		secrets.keys().cloned().collect()421	}422423	pub fn has_secret(&self, host: &str, secret: &str) -> bool {424		let data = self.data();425		let Some(host_secrets) = data.host_secrets.get(host) else {426			return false;427		};428		host_secrets.contains_key(secret)429	}430	pub fn insert_secret(&self, host: &str, secret: String, value: FleetSecret) {431		let mut data = self.data_mut();432		let host_secrets = data.host_secrets.entry(host.to_owned()).or_default();433		host_secrets.insert(secret, value);434	}435436	pub fn host_secret(&self, host: &str, secret: &str) -> Result<FleetSecret> {437		let data = self.data();438		let Some(host_secrets) = data.host_secrets.get(host) else {439			bail!("no secrets for machine {host}");440		};441		let Some(secret) = host_secrets.get(secret) else {442			bail!("machine {host} has no secret {secret}");443		};444		Ok(secret.clone())445	}446	pub fn shared_secret(&self, secret: &str) -> Result<FleetSharedSecret> {447		let data = self.data();448		let Some(secret) = data.shared_secrets.get(secret) else {449			bail!("no shared secret {secret}");450		};451		Ok(secret.clone())452	}453	pub async fn shared_secret_expected_owners(&self, secret: &str) -> Result<Vec<String>> {454		let config_field = &self.config_field;455		Ok(nix_go_json!(456			config_field.sharedSecrets[{ secret }].expectedOwners457		))458	}459460	// TODO: Should this be something modifiable from other processes?461	// E.g terraform provider might want to update FleetData (e.g secrets),462	// and current implementation assumes only one process holds current fleet.nix463	// Given that it is no longer needs to be a file for nix evaluation,464	// maybe it can be a .nix file for persistence, but accessible only465	// thru some shared state controller? Might it be stored in terraform466	// state provider?467	pub fn data(&self) -> MutexGuard<FleetData> {468		self.data.lock().unwrap()469	}470	pub fn data_mut(&self) -> MutexGuard<FleetData> {471		self.data.lock().unwrap()472	}473	pub fn save(&self) -> Result<()> {474		let mut tempfile = NamedTempFile::new_in(self.directory.clone()).context("failed to create updated version of fleet.nix in the same directory as original.\nDo you have write access to it? Access only to the fleet.nix won't be enough, the directory is used for atomic overwrite operation.\nIt is not recommended to use fleet by root anyway, move fleet project to your home directory.")?;475		let data = nixlike::serialize(&self.data() as &FleetData)?;476		tempfile.write_all(477			format!(478				"# This file contains fleet state and shouldn't be edited by hand\n\n{}\n\n# vim: ts=2 et nowrap\n",479				data480			)481			.as_bytes(),482		)?;483		let mut fleet_data_path = self.directory.clone();484		fleet_data_path.push("fleet.nix");485		tempfile.persist(fleet_data_path)?;486		Ok(())487	}488}
after · crates/fleet-base/src/host.rs
1use std::{2	cell::OnceCell,3	collections::BTreeSet,4	ffi::{OsStr, OsString},5	fmt::Display,6	io::Write,7	ops::Deref,8	path::PathBuf,9	str::FromStr,10	sync::{Arc, Mutex, MutexGuard, OnceLock},11};1213use anyhow::{anyhow, bail, ensure, Context, Result};14use fleet_shared::SecretData;15use nix_eval::{nix_go, nix_go_json, util::assert_warn, NixSession, Value};16use openssh::SessionBuilder;17use serde::de::DeserializeOwned;18use tempfile::NamedTempFile;1920use crate::{21	command::MyCommand,22	fleetdata::{FleetData, FleetSecret, FleetSharedSecret},23};2425pub struct FleetConfigInternals {26	pub local_system: String,27	pub directory: PathBuf,28	pub data: Mutex<FleetData>,29	pub nix_args: Vec<OsString>,30	/// fleet_config.config31	pub config_field: Value,32	// TODO: Remove with connectivity refactor33	pub localhost: String,3435	/// import nixpkgs {system = local};36	pub default_pkgs: Value,37	pub nixpkgs: Value,3839	pub nix_session: NixSession,40}4142// TODO: Make field not pub43#[derive(Clone)]44pub struct Config(pub Arc<FleetConfigInternals>);4546impl Deref for Config {47	type Target = FleetConfigInternals;4849	fn deref(&self) -> &Self::Target {50		&self.051	}52}5354#[derive(Clone, Copy, Debug)]55pub enum EscalationStrategy {56	Sudo,57	Run0,58	Su,59}6061#[derive(Clone, PartialEq, Copy)]62pub enum DeployKind {63	/// NixOS => NixOS managed by fleet64	UpgradeToFleet,65	/// NixOS managed by fleet => NixOS managed by fleet66	Fleet,67	/// Remote host has /mnt, /mnt/boot mounted,68	/// generated config is added to fleet configuration.69	NixosInstall,70}7172impl FromStr for DeployKind {73	type Err = anyhow::Error;74	fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {75		match s {76			"upgrade-to-fleet" => Ok(Self::UpgradeToFleet),77			"fleet" => Ok(Self::Fleet),78			"nixos-install" => Ok(Self::NixosInstall),79			v => bail!("unknown deploy_kind: {v}; expected on of \"upgrade-to-fleet\", \"fleet\", \"nixos-install\""),80		}81	}82}83pub struct ConfigHost {84	config: Config,85	pub name: String,86	groups: OnceCell<Vec<String>>,8788	deploy_kind: OnceCell<DeployKind>,8990	pub host_config: Option<Value>,91	pub nixos_config: OnceCell<Value>,92	pub pkgs_override: Option<Value>,9394	// TODO: Move command helpers away with connectivity refactor95	pub local: bool,96	pub session: OnceLock<Arc<openssh::Session>>,97}98// TODO: Move command helpers away with connectivity refactor99impl ConfigHost {100	pub fn set_deploy_kind(&self, kind: DeployKind) {101		self.deploy_kind102			.set(kind)103			.ok()104			.expect("deploy kind is already set");105	}106	pub async fn deploy_kind(&self) -> Result<DeployKind> {107		if let Some(kind) = self.deploy_kind.get() {108			return Ok(kind.clone());109		}110		let is_fleet_managed = match self.file_exists("/etc/FLEET_HOST").await {111			Ok(v) => v,112			Err(e) => {113				bail!("failed to query remote system kind: {}", e);114			}115		};116		if !is_fleet_managed {117			bail!(indoc::indoc! {"118				host is not marked as managed by fleet119				if you're not trying to lustrate/install system from scratch,120				you should either121					1. manually create /etc/FLEET_HOST file on the target host,122					2. use ?deploy_kind=fleet host argument if you're upgrading from older version of fleet123					3. use ?deploy_kind=upgrade_to_fleet if you're upgrading from plain nixos to fleet-managed nixos124			"});125		}126		// TOCTOU is possible127		let _ = self.deploy_kind.set(DeployKind::Fleet);128		Ok(self129			.deploy_kind130			.get()131			.expect("deploy kind is just set")132			.clone())133	}134	pub async fn escalation_strategy(&self) -> Result<EscalationStrategy> {135		// Prefer sudo, as run0 has some gotchas with polkit136		// and too many repeating prompts.137		if (self.find_in_path("sudo").await).is_ok() {138			return Ok(EscalationStrategy::Sudo);139		}140		if (self.find_in_path("run0").await).is_ok() {141			return Ok(EscalationStrategy::Run0);142		}143		Ok(EscalationStrategy::Su)144	}145	async fn open_session(&self) -> Result<Arc<openssh::Session>> {146		assert!(!self.local, "do not open ssh connection to local session");147		// FIXME: TOCTOU148		if let Some(session) = &self.session.get() {149			return Ok((*session).clone());150		};151		let session = SessionBuilder::default();152		let session = session153			.connect(&self.name)154			.await155			.map_err(|e| anyhow!("ssh error while connecting to {}: {e}", self.name))?;156		let session = Arc::new(session);157		self.session.set(session.clone()).expect("TOCTOU happened");158		Ok(session)159	}160	pub async fn mktemp_dir(&self) -> Result<String> {161		let mut cmd = self.cmd("mktemp").await?;162		cmd.arg("-d");163		let path = cmd.run_string().await?;164		Ok(path.trim_end().to_owned())165	}166	pub async fn file_exists(&self, path: impl AsRef<OsStr>) -> Result<bool> {167		let mut cmd = self.cmd("sh").await?;168		cmd.arg("-c")169			.arg("test -e \"$1\" && echo true || echo false")170			.arg("_")171			.arg(path);172		Ok(cmd.run_value().await?)173	}174	pub async fn read_file_bin(&self, path: impl AsRef<OsStr>) -> Result<Vec<u8>> {175		let mut cmd = self.cmd("cat").await?;176		cmd.arg(path);177		cmd.run_bytes().await178	}179	pub async fn read_file_text(&self, path: impl AsRef<OsStr>) -> Result<String> {180		let mut cmd = self.cmd("cat").await?;181		cmd.arg(path);182		cmd.run_string().await183	}184	pub async fn read_dir(&self, path: impl AsRef<OsStr>) -> Result<Vec<String>> {185		let mut cmd = self.cmd("ls").await?;186		cmd.arg(path);187		let out = cmd.run_string().await?;188		let mut lines = out.split('\n');189		if let Some(last) = lines.next_back() {190			ensure!(last.is_empty(), "output of ls should end with newline");191		}192		Ok(lines.map(ToOwned::to_owned).collect())193	}194	#[allow(dead_code)]195	pub async fn read_file_json<D: DeserializeOwned>(&self, path: impl AsRef<OsStr>) -> Result<D> {196		let text = self.read_file_text(path).await?;197		Ok(serde_json::from_str(&text)?)198	}199	pub async fn read_env(&self, env: &str) -> Result<String> {200		let mut cmd = self.cmd("printenv").await?;201		cmd.arg(env);202		cmd.run_string().await203	}204	pub async fn find_in_path(&self, command: &str) -> Result<String> {205		// // `which` is not a part of coreutils, and it might not exist on machine.206		// let path = self.read_env("PATH").await?;207		// // Assuming delimiter is :, we don't work with windows host, this check will be much208		// // more sophisticated in remowt backend (and quicker, since actual PATH search will be done on remote machine)209		// for ele in path.split(':') {210		// 	let test_path = format!("{ele}/{cmd}");211		// 	test -x etc212		// }213		// let mut cmd = self.cmd("printenv").await?;214		// cmd.arg(env);215		// Ok(cmd.run_string().await?)216		// Assuming this is an environment issue if which doesn't exist, will be fixed with remowt.217		let mut cmd = self218			.cmd_escalation(219				// Not used220				EscalationStrategy::Su,221				"which",222			)223			.await?;224		cmd.arg(command);225		cmd.run_string().await226	}227	pub async fn read_file_value<D: FromStr>(&self, path: impl AsRef<OsStr>) -> Result<D>228	where229		<D as FromStr>::Err: Display,230	{231		let text = self.read_file_text(path).await?;232		D::from_str(&text).map_err(|e| anyhow!("failed to parse value: {e}"))233	}234	pub async fn cmd(&self, cmd: impl AsRef<OsStr>) -> Result<MyCommand> {235		self.cmd_escalation(self.escalation_strategy().await?, cmd)236			.await237	}238	pub async fn cmd_escalation(239		&self,240		escalation: EscalationStrategy,241		cmd: impl AsRef<OsStr>,242	) -> Result<MyCommand> {243		if self.local {244			Ok(MyCommand::new(escalation, cmd))245		} else {246			let session = self.open_session().await?;247			Ok(MyCommand::new_on(escalation, cmd, session))248		}249	}250	pub async fn nix_cmd(&self) -> Result<MyCommand> {251		let mut nix = self.cmd("nix").await?;252		nix.args([253			"--extra-experimental-features",254			"nix-command",255			"--extra-experimental-features",256			"flakes",257		]);258		Ok(nix)259	}260261	pub async fn decrypt(&self, data: SecretData) -> Result<Vec<u8>> {262		ensure!(data.encrypted, "secret is not encrypted");263		let mut cmd = self.cmd("fleet-install-secrets").await?;264		cmd.arg("decrypt").eqarg("--secret", data.to_string());265		let encoded = cmd266			.sudo()267			.run_string()268			.await269			.context("failed to call remote host for decrypt")?;270		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;271		ensure!(!data.encrypted, "secret came out encrypted");272		Ok(data.data)273	}274	pub async fn reencrypt(&self, data: SecretData, targets: Vec<String>) -> Result<SecretData> {275		ensure!(data.encrypted, "secret is not encrypted");276		let mut cmd = self.cmd("fleet-install-secrets").await?;277		cmd.arg("reencrypt").eqarg("--secret", data.to_string());278		for target in targets {279			let key = self.config.key(&target).await?;280			cmd.eqarg("--targets", key);281		}282		let encoded = cmd283			.sudo()284			.run_string()285			.await286			.context("failed to call remote host for decrypt")?;287		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;288		ensure!(data.encrypted, "secret came out not encrypted");289		Ok(data)290	}291	/// Returns path for futureproofing, as path might change i.e on conversion to CA292	pub async fn remote_derivation(&self, path: &PathBuf) -> Result<PathBuf> {293		if self.local {294			// Path is located locally, thus already trusted.295			return Ok(path.to_owned());296		}297		let mut nix = MyCommand::new(298			// Not used299			EscalationStrategy::Su,300			"nix",301		);302		nix.arg("copy").arg("--substitute-on-destination");303304		match self.deploy_kind().await? {305			DeployKind::Fleet | DeployKind::UpgradeToFleet => {306				nix.comparg("--to", format!("ssh-ng://{}", self.name));307			}308			DeployKind::NixosInstall => {309				nix310					// Signature checking makes no sense with remote-store store argument set, as we're not even interacting with remote nix daemon311					.arg("--no-check-sigs")312					.comparg(313						"--to",314						format!("ssh-ng://root@{}-install?remote-store=/mnt", self.name),315					);316			}317		}318		nix.arg(path);319		nix.run_nix().await.context("nix copy")?;320		Ok(path.to_owned())321	}322	pub async fn systemctl_stop(&self, name: &str) -> Result<()> {323		let mut cmd = self.cmd("systemctl").await?;324		cmd.arg("stop").arg(name);325		cmd.sudo().run().await326	}327	pub async fn systemctl_start(&self, name: &str) -> Result<()> {328		let mut cmd = self.cmd("systemctl").await?;329		cmd.arg("start").arg(name);330		cmd.sudo().run().await331	}332333	pub async fn rm_file(&self, path: impl AsRef<OsStr>, sudo: bool) -> Result<()> {334		let mut cmd = self.cmd("rm").await?;335		cmd.arg("-f").arg(path);336		if sudo {337			cmd = cmd.sudo()338		}339		cmd.run().await340	}341}342impl ConfigHost {343	// TOCTOU is possible here in case if config is changed, but this case is not handled anywhere anyway,344	// assuming getting tags always returns the same value.345	pub async fn tags(&self) -> Result<Vec<String>> {346		if let Some(v) = self.groups.get() {347			return Ok(v.clone());348		}349		let Some(host_config) = &self.host_config else {350			return Ok(vec![]);351		};352		let tags: Vec<String> = nix_go_json!(host_config.tags);353354		let _ = self.groups.set(tags.clone());355356		Ok(tags)357	}358	pub async fn nixos_config(&self) -> Result<Value> {359		if let Some(v) = self.nixos_config.get() {360			return Ok(v.clone());361		}362		let Some(host_config) = &self.host_config else {363			bail!("local host has no nixos_config");364		};365		let nixos_config = nix_go!(host_config.nixos.config);366		assert_warn("nixos config evaluation", &nixos_config).await?;367368		let _ = self.nixos_config.set(nixos_config.clone());369370		Ok(nixos_config)371	}372373	pub async fn list_configured_secrets(&self) -> Result<Vec<String>> {374		let nixos = self.nixos_config().await?;375		let secrets = nix_go!(nixos.secrets);376		let mut out = Vec::new();377		for name in secrets.list_fields().await? {378			let secret = nix_go!(secrets[{ name }]);379			let is_shared: bool = nix_go_json!(secret.shared);380			if is_shared {381				continue;382			}383			out.push(name);384		}385		Ok(out)386	}387	pub async fn secret_field(&self, name: &str) -> Result<Value> {388		let nixos = self.nixos_config().await?;389		Ok(nix_go!(nixos.secrets[{ name }]))390	}391392	/// Packages for this host, resolved with nixpkgs overlays393	pub async fn pkgs(&self) -> Result<Value> {394		if let Some(value) = &self.pkgs_override {395			return Ok(value.clone());396		}397		let Some(host_config) = &self.host_config else {398			bail!("local host has no host_config");399		};400		// TODO: Should nixos.options be cached?401		Ok(nix_go!(host_config.nixos.options._module.args.value.pkgs))402	}403}404405impl Config {406	pub async fn tagged_hostnames(&self, tag: &str) -> Result<Vec<String>> {407		let config = &self.config_field;408		let tagged: Vec<String> = nix_go_json!(config.taggedWith[{ tag }]);409		Ok(tagged)410	}411	pub async fn expand_owner_set(&self, owners: Vec<String>) -> Result<BTreeSet<String>> {412		let mut out = BTreeSet::new();413		for owner in owners {414			if let Some(tag) = owner.strip_prefix('@') {415				let hosts = self.tagged_hostnames(tag).await?;416				out.extend(hosts);417			} else {418				out.insert(owner);419			}420		}421		Ok(out)422	}423	pub fn local_host(&self) -> ConfigHost {424		ConfigHost {425			config: self.clone(),426			name: "<virtual localhost>".to_owned(),427			host_config: None,428			nixos_config: OnceCell::new(),429			groups: {430				let cell = OnceCell::new();431				let _ = cell.set(vec![]);432				cell433			},434			pkgs_override: Some(self.default_pkgs.clone()),435436			local: true,437			session: OnceLock::new(),438			deploy_kind: OnceCell::new(),439		}440	}441442	pub async fn host(&self, name: &str) -> Result<ConfigHost> {443		let config = &self.config_field;444		let host_config = nix_go!(config.hosts[{ name }]);445446		Ok(ConfigHost {447			config: self.clone(),448			name: name.to_owned(),449			host_config: Some(host_config),450			nixos_config: OnceCell::new(),451			groups: OnceCell::new(),452			pkgs_override: None,453454			// TODO: Remove with connectivit refactor455			local: self.localhost == name,456			session: OnceLock::new(),457			deploy_kind: OnceCell::new(),458		})459	}460	pub async fn list_hosts(&self) -> Result<Vec<ConfigHost>> {461		let config = &self.config_field;462		let names = nix_go!(config.hosts).list_fields().await?;463		let mut out = vec![];464		for name in names {465			out.push(self.host(&name).await?);466		}467		Ok(out)468	}469	// TODO: Replace usages with .host().nixos_config470	pub async fn system_config(&self, host: &str) -> Result<Value> {471		let fleet_field = &self.config_field;472		Ok(nix_go!(fleet_field.hosts[{ host }].nixos.config))473	}474475	/// Shared secrets configured in fleet.nix or in flake476	pub async fn list_configured_shared(&self) -> Result<Vec<String>> {477		let config_field = &self.config_field;478		Ok(nix_go!(config_field.sharedSecrets).list_fields().await?)479	}480	/// Shared secrets configured in fleet.nix481	pub fn list_shared(&self) -> Vec<String> {482		let data = self.data();483		data.shared_secrets.keys().cloned().collect()484	}485	pub fn has_shared(&self, name: &str) -> bool {486		let data = self.data();487		data.shared_secrets.contains_key(name)488	}489	pub fn replace_shared(&self, name: String, shared: FleetSharedSecret) {490		let mut data = self.data_mut();491		data.shared_secrets.insert(name.to_owned(), shared);492	}493	pub fn remove_shared(&self, secret: &str) {494		let mut data = self.data_mut();495		data.shared_secrets.remove(secret);496	}497498	pub fn list_secrets(&self, host: &str) -> Vec<String> {499		let data = self.data();500		let Some(secrets) = data.host_secrets.get(host) else {501			return Vec::new();502		};503		secrets.keys().cloned().collect()504	}505506	pub fn has_secret(&self, host: &str, secret: &str) -> bool {507		let data = self.data();508		let Some(host_secrets) = data.host_secrets.get(host) else {509			return false;510		};511		host_secrets.contains_key(secret)512	}513	pub fn insert_secret(&self, host: &str, secret: String, value: FleetSecret) {514		let mut data = self.data_mut();515		let host_secrets = data.host_secrets.entry(host.to_owned()).or_default();516		host_secrets.insert(secret, value);517	}518519	pub fn host_secret(&self, host: &str, secret: &str) -> Result<FleetSecret> {520		let data = self.data();521		let Some(host_secrets) = data.host_secrets.get(host) else {522			bail!("no secrets for machine {host}");523		};524		let Some(secret) = host_secrets.get(secret) else {525			bail!("machine {host} has no secret {secret}");526		};527		Ok(secret.clone())528	}529	pub fn shared_secret(&self, secret: &str) -> Result<FleetSharedSecret> {530		let data = self.data();531		let Some(secret) = data.shared_secrets.get(secret) else {532			bail!("no shared secret {secret}");533		};534		Ok(secret.clone())535	}536	pub async fn shared_secret_expected_owners(&self, secret: &str) -> Result<Vec<String>> {537		let config_field = &self.config_field;538		Ok(nix_go_json!(539			config_field.sharedSecrets[{ secret }].expectedOwners540		))541	}542543	// TODO: Should this be something modifiable from other processes?544	// E.g terraform provider might want to update FleetData (e.g secrets),545	// and current implementation assumes only one process holds current fleet.nix546	// Given that it is no longer needs to be a file for nix evaluation,547	// maybe it can be a .nix file for persistence, but accessible only548	// thru some shared state controller? Might it be stored in terraform549	// state provider?550	pub fn data(&self) -> MutexGuard<FleetData> {551		self.data.lock().unwrap()552	}553	pub fn data_mut(&self) -> MutexGuard<FleetData> {554		self.data.lock().unwrap()555	}556	pub fn save(&self) -> Result<()> {557		let mut tempfile = NamedTempFile::new_in(self.directory.clone()).context("failed to create updated version of fleet.nix in the same directory as original.\nDo you have write access to it? Access only to the fleet.nix won't be enough, the directory is used for atomic overwrite operation.\nIt is not recommended to use fleet by root anyway, move fleet project to your home directory.")?;558		let data = nixlike::serialize(&self.data() as &FleetData)?;559		tempfile.write_all(560			format!(561				"# This file contains fleet state and shouldn't be edited by hand\n\n{}\n\n# vim: ts=2 et nowrap\n",562				data563			)564			.as_bytes(),565		)?;566		let mut fleet_data_path = self.directory.clone();567		fleet_data_path.push("fleet.nix");568		tempfile.persist(fleet_data_path)?;569		Ok(())570	}571}
modifiedmodules/nixos/meta.nixdiffbeforeafterboth
--- a/modules/nixos/meta.nix
+++ b/modules/nixos/meta.nix
@@ -13,5 +13,13 @@
   ];
 
   # Version of environment (fleet scripts such as rollback) already installed on the host
-  config.environment.etc.FLEET_HOST.text = "1";
+  config = {
+    environment.etc.FLEET_HOST.text = "1";
+
+    # Flake/nix command support is assumed by fleet, lets add it here to avoid potential problems.
+    nix.settings.experimental-features = [
+      "nix-command"
+      "flakes"
+    ];
+  };
 }