git.delta.rocks / jrsonnet / refs/commits / 33e3a6cc33fd

difftreelog

feat basic lustration helper

Lach2025-04-24parent: #bd11592.patch.diff
in: trunk

3 files changed

modifiedcmds/fleet/src/cmds/build_systems.rsdiffbeforeafterboth
--- a/cmds/fleet/src/cmds/build_systems.rs
+++ b/cmds/fleet/src/cmds/build_systems.rs
@@ -1,6 +1,6 @@
 use std::{env::current_dir, os::unix::fs::symlink, path::PathBuf, time::Duration};
 
-use anyhow::{anyhow, bail, Result};
+use anyhow::{anyhow, bail, Context, Result};
 use clap::{Parser, ValueEnum};
 use fleet_base::{
 	host::{Config, ConfigHost, DeployKind},
@@ -132,10 +132,10 @@
 	disable_rollback: bool,
 ) -> Result<()> {
 	let deploy_kind = host.deploy_kind().await?;
-	if deploy_kind == DeployKind::NixosInstall
+	if (deploy_kind == DeployKind::NixosInstall || deploy_kind == DeployKind::NixosLustrate)
 		&& !matches!(action, DeployAction::Boot | DeployAction::Upload)
 	{
-		bail!("nixos-install deploy kind only supports boot and upload actions");
+		bail!("{deploy_kind:?} deploy kind only supports boot and upload actions");
 	}
 
 	let mut failed = false;
@@ -184,6 +184,17 @@
 			}
 		}
 	}
+	if deploy_kind == DeployKind::NixosLustrate {
+		// Fleet could also create this file, but as this operation is potentially disruptive,
+		// make user do it themself.
+		if !host.file_exists("/etc/NIXOS_LUSTRATE").await? {
+			bail!("/etc/NIXOS_LUSTRATE should be created on remote host");
+		}
+		// Wanted by NixOS to recognize the system as NixOS.
+		let mut cmd = host.cmd("touch").await?;
+		cmd.arg("/etc/NIXOS");
+		cmd.sudo().run().await.context("creating /etc/NIXOS")?;
+	}
 	if deploy_kind == DeployKind::NixosInstall {
 		info!(
 			"running nixos-install to switch profile, install bootloader, and perform activation"
@@ -247,6 +258,9 @@
 			};
 			let switch_script = specialised.join("bin/switch-to-configuration");
 			let mut cmd = host.cmd(switch_script).in_current_span().await?;
+			if deploy_kind == DeployKind::NixosLustrate {
+				cmd.env("NIXOS_INSTALL_BOOTLOADER", "1");
+			}
 			cmd.env("FLEET_ONLINE_ACTIVATION", "1")
 				.arg(action.name().expect("upload.should_activate == false"));
 			if let Err(e) = cmd.sudo().run().in_current_span().await {
modifiedcrates/fleet-base/src/host.rsdiffbeforeafterboth
before · crates/fleet-base/src/host.rs
1use std::{2	cell::OnceCell,3	collections::BTreeSet,4	ffi::{OsStr, OsString},5	fmt::Display,6	io::Write,7	ops::Deref,8	path::PathBuf,9	str::FromStr,10	sync::{Arc, Mutex, MutexGuard, OnceLock},11};1213use anyhow::{anyhow, bail, ensure, Context, Result};14use fleet_shared::SecretData;15use nix_eval::{nix_go, nix_go_json, util::assert_warn, NixSession, Value};16use openssh::SessionBuilder;17use serde::de::DeserializeOwned;18use tempfile::NamedTempFile;1920use crate::{21	command::MyCommand,22	fleetdata::{FleetData, FleetSecret, FleetSharedSecret},23};2425pub struct FleetConfigInternals {26	pub local_system: String,27	pub directory: PathBuf,28	pub data: Mutex<FleetData>,29	pub nix_args: Vec<OsString>,30	/// fleet_config.config31	pub config_field: Value,32	// TODO: Remove with connectivity refactor33	pub localhost: String,3435	/// import nixpkgs {system = local};36	pub default_pkgs: Value,37	pub nixpkgs: Value,3839	pub nix_session: NixSession,40}4142// TODO: Make field not pub43#[derive(Clone)]44pub struct Config(pub Arc<FleetConfigInternals>);4546impl Deref for Config {47	type Target = FleetConfigInternals;4849	fn deref(&self) -> &Self::Target {50		&self.051	}52}5354#[derive(Clone, Copy, Debug)]55pub enum EscalationStrategy {56	Sudo,57	Run0,58	Su,59}6061#[derive(Clone, PartialEq, Copy)]62pub enum DeployKind {63	/// NixOS => NixOS managed by fleet64	UpgradeToFleet,65	/// NixOS managed by fleet => NixOS managed by fleet66	Fleet,67	/// Remote host has /mnt, /mnt/boot mounted,68	/// generated config is added to fleet configuration.69	NixosInstall,70}7172impl FromStr for DeployKind {73	type Err = anyhow::Error;74	fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {75		match s {76			"upgrade-to-fleet" => Ok(Self::UpgradeToFleet),77			"fleet" => Ok(Self::Fleet),78			"nixos-install" => Ok(Self::NixosInstall),79			v => bail!("unknown deploy_kind: {v}; expected on of \"upgrade-to-fleet\", \"fleet\", \"nixos-install\""),80		}81	}82}83pub struct ConfigHost {84	config: Config,85	pub name: String,86	groups: OnceCell<Vec<String>>,8788	deploy_kind: OnceCell<DeployKind>,8990	pub host_config: Option<Value>,91	pub nixos_config: OnceCell<Value>,92	pub pkgs_override: Option<Value>,9394	// TODO: Move command helpers away with connectivity refactor95	pub local: bool,96	pub session: OnceLock<Arc<openssh::Session>>,97}98// TODO: Move command helpers away with connectivity refactor99impl ConfigHost {100	pub fn set_deploy_kind(&self, kind: DeployKind) {101		self.deploy_kind102			.set(kind)103			.ok()104			.expect("deploy kind is already set");105	}106	pub async fn deploy_kind(&self) -> Result<DeployKind> {107		if let Some(kind) = self.deploy_kind.get() {108			return Ok(kind.clone());109		}110		let is_fleet_managed = match self.file_exists("/etc/FLEET_HOST").await {111			Ok(v) => v,112			Err(e) => {113				bail!("failed to query remote system kind: {}", e);114			}115		};116		if !is_fleet_managed {117			bail!(indoc::indoc! {"118				host is not marked as managed by fleet119				if you're not trying to lustrate/install system from scratch,120				you should either121					1. manually create /etc/FLEET_HOST file on the target host,122					2. use ?deploy_kind=fleet host argument if you're upgrading from older version of fleet123					3. use ?deploy_kind=upgrade_to_fleet if you're upgrading from plain nixos to fleet-managed nixos124			"});125		}126		// TOCTOU is possible127		let _ = self.deploy_kind.set(DeployKind::Fleet);128		Ok(self129			.deploy_kind130			.get()131			.expect("deploy kind is just set")132			.clone())133	}134	pub async fn escalation_strategy(&self) -> Result<EscalationStrategy> {135		// Prefer sudo, as run0 has some gotchas with polkit136		// and too many repeating prompts.137		if (self.find_in_path("sudo").await).is_ok() {138			return Ok(EscalationStrategy::Sudo);139		}140		if (self.find_in_path("run0").await).is_ok() {141			return Ok(EscalationStrategy::Run0);142		}143		Ok(EscalationStrategy::Su)144	}145	async fn open_session(&self) -> Result<Arc<openssh::Session>> {146		assert!(!self.local, "do not open ssh connection to local session");147		// FIXME: TOCTOU148		if let Some(session) = &self.session.get() {149			return Ok((*session).clone());150		};151		let session = SessionBuilder::default();152		let session = session153			.connect(&self.name)154			.await155			.map_err(|e| anyhow!("ssh error while connecting to {}: {e}", self.name))?;156		let session = Arc::new(session);157		self.session.set(session.clone()).expect("TOCTOU happened");158		Ok(session)159	}160	pub async fn mktemp_dir(&self) -> Result<String> {161		let mut cmd = self.cmd("mktemp").await?;162		cmd.arg("-d");163		let path = cmd.run_string().await?;164		Ok(path.trim_end().to_owned())165	}166	pub async fn file_exists(&self, path: impl AsRef<OsStr>) -> Result<bool> {167		let mut cmd = self.cmd("sh").await?;168		cmd.arg("-c")169			.arg("test -e \"$1\" && echo true || echo false")170			.arg("_")171			.arg(path);172		Ok(cmd.run_value().await?)173	}174	pub async fn read_file_bin(&self, path: impl AsRef<OsStr>) -> Result<Vec<u8>> {175		let mut cmd = self.cmd("cat").await?;176		cmd.arg(path);177		cmd.run_bytes().await178	}179	pub async fn read_file_text(&self, path: impl AsRef<OsStr>) -> Result<String> {180		let mut cmd = self.cmd("cat").await?;181		cmd.arg(path);182		cmd.run_string().await183	}184	pub async fn read_dir(&self, path: impl AsRef<OsStr>) -> Result<Vec<String>> {185		let mut cmd = self.cmd("ls").await?;186		cmd.arg(path);187		let out = cmd.run_string().await?;188		let mut lines = out.split('\n');189		if let Some(last) = lines.next_back() {190			ensure!(last.is_empty(), "output of ls should end with newline");191		}192		Ok(lines.map(ToOwned::to_owned).collect())193	}194	#[allow(dead_code)]195	pub async fn read_file_json<D: DeserializeOwned>(&self, path: impl AsRef<OsStr>) -> Result<D> {196		let text = self.read_file_text(path).await?;197		Ok(serde_json::from_str(&text)?)198	}199	pub async fn read_env(&self, env: &str) -> Result<String> {200		let mut cmd = self.cmd("printenv").await?;201		cmd.arg(env);202		cmd.run_string().await203	}204	pub async fn find_in_path(&self, command: &str) -> Result<String> {205		// // `which` is not a part of coreutils, and it might not exist on machine.206		// let path = self.read_env("PATH").await?;207		// // Assuming delimiter is :, we don't work with windows host, this check will be much208		// // more sophisticated in remowt backend (and quicker, since actual PATH search will be done on remote machine)209		// for ele in path.split(':') {210		// 	let test_path = format!("{ele}/{cmd}");211		// 	test -x etc212		// }213		// let mut cmd = self.cmd("printenv").await?;214		// cmd.arg(env);215		// Ok(cmd.run_string().await?)216		// Assuming this is an environment issue if which doesn't exist, will be fixed with remowt.217		let mut cmd = self218			.cmd_escalation(219				// Not used220				EscalationStrategy::Su,221				"which",222			)223			.await?;224		cmd.arg(command);225		cmd.run_string().await226	}227	pub async fn read_file_value<D: FromStr>(&self, path: impl AsRef<OsStr>) -> Result<D>228	where229		<D as FromStr>::Err: Display,230	{231		let text = self.read_file_text(path).await?;232		D::from_str(&text).map_err(|e| anyhow!("failed to parse value: {e}"))233	}234	pub async fn cmd(&self, cmd: impl AsRef<OsStr>) -> Result<MyCommand> {235		self.cmd_escalation(self.escalation_strategy().await?, cmd)236			.await237	}238	pub async fn cmd_escalation(239		&self,240		escalation: EscalationStrategy,241		cmd: impl AsRef<OsStr>,242	) -> Result<MyCommand> {243		if self.local {244			Ok(MyCommand::new(escalation, cmd))245		} else {246			let session = self.open_session().await?;247			Ok(MyCommand::new_on(escalation, cmd, session))248		}249	}250	pub async fn nix_cmd(&self) -> Result<MyCommand> {251		let mut nix = self.cmd("nix").await?;252		nix.args([253			"--extra-experimental-features",254			"nix-command",255			"--extra-experimental-features",256			"flakes",257		]);258		Ok(nix)259	}260261	pub async fn decrypt(&self, data: SecretData) -> Result<Vec<u8>> {262		ensure!(data.encrypted, "secret is not encrypted");263		let mut cmd = self.cmd("fleet-install-secrets").await?;264		cmd.arg("decrypt").eqarg("--secret", data.to_string());265		let encoded = cmd266			.sudo()267			.run_string()268			.await269			.context("failed to call remote host for decrypt")?;270		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;271		ensure!(!data.encrypted, "secret came out encrypted");272		Ok(data.data)273	}274	pub async fn reencrypt(&self, data: SecretData, targets: Vec<String>) -> Result<SecretData> {275		ensure!(data.encrypted, "secret is not encrypted");276		let mut cmd = self.cmd("fleet-install-secrets").await?;277		cmd.arg("reencrypt").eqarg("--secret", data.to_string());278		for target in targets {279			let key = self.config.key(&target).await?;280			cmd.eqarg("--targets", key);281		}282		let encoded = cmd283			.sudo()284			.run_string()285			.await286			.context("failed to call remote host for decrypt")?;287		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;288		ensure!(data.encrypted, "secret came out not encrypted");289		Ok(data)290	}291	/// Returns path for futureproofing, as path might change i.e on conversion to CA292	pub async fn remote_derivation(&self, path: &PathBuf) -> Result<PathBuf> {293		if self.local {294			// Path is located locally, thus already trusted.295			return Ok(path.to_owned());296		}297		let mut nix = MyCommand::new(298			// Not used299			EscalationStrategy::Su,300			"nix",301		);302		nix.arg("copy").arg("--substitute-on-destination");303304		match self.deploy_kind().await? {305			DeployKind::Fleet | DeployKind::UpgradeToFleet => {306				nix.comparg("--to", format!("ssh-ng://{}", self.name));307			}308			DeployKind::NixosInstall => {309				nix310					// Signature checking makes no sense with remote-store store argument set, as we're not even interacting with remote nix daemon311					.arg("--no-check-sigs")312					.comparg(313						"--to",314						format!("ssh-ng://root@{}-install?remote-store=/mnt", self.name),315					);316			}317		}318		nix.arg(path);319		nix.run_nix().await.context("nix copy")?;320		Ok(path.to_owned())321	}322	pub async fn systemctl_stop(&self, name: &str) -> Result<()> {323		let mut cmd = self.cmd("systemctl").await?;324		cmd.arg("stop").arg(name);325		cmd.sudo().run().await326	}327	pub async fn systemctl_start(&self, name: &str) -> Result<()> {328		let mut cmd = self.cmd("systemctl").await?;329		cmd.arg("start").arg(name);330		cmd.sudo().run().await331	}332333	pub async fn rm_file(&self, path: impl AsRef<OsStr>, sudo: bool) -> Result<()> {334		let mut cmd = self.cmd("rm").await?;335		cmd.arg("-f").arg(path);336		if sudo {337			cmd = cmd.sudo()338		}339		cmd.run().await340	}341}342impl ConfigHost {343	// TOCTOU is possible here in case if config is changed, but this case is not handled anywhere anyway,344	// assuming getting tags always returns the same value.345	pub async fn tags(&self) -> Result<Vec<String>> {346		if let Some(v) = self.groups.get() {347			return Ok(v.clone());348		}349		let Some(host_config) = &self.host_config else {350			return Ok(vec![]);351		};352		let tags: Vec<String> = nix_go_json!(host_config.tags);353354		let _ = self.groups.set(tags.clone());355356		Ok(tags)357	}358	pub async fn nixos_config(&self) -> Result<Value> {359		if let Some(v) = self.nixos_config.get() {360			return Ok(v.clone());361		}362		let Some(host_config) = &self.host_config else {363			bail!("local host has no nixos_config");364		};365		let nixos_config = nix_go!(host_config.nixos.config);366		assert_warn("nixos config evaluation", &nixos_config).await?;367368		let _ = self.nixos_config.set(nixos_config.clone());369370		Ok(nixos_config)371	}372373	pub async fn list_configured_secrets(&self) -> Result<Vec<String>> {374		let nixos = self.nixos_config().await?;375		let secrets = nix_go!(nixos.secrets);376		let mut out = Vec::new();377		for name in secrets.list_fields().await? {378			let secret = nix_go!(secrets[{ name }]);379			let is_shared: bool = nix_go_json!(secret.shared);380			if is_shared {381				continue;382			}383			out.push(name);384		}385		Ok(out)386	}387	pub async fn secret_field(&self, name: &str) -> Result<Value> {388		let nixos = self.nixos_config().await?;389		Ok(nix_go!(nixos.secrets[{ name }]))390	}391392	/// Packages for this host, resolved with nixpkgs overlays393	pub async fn pkgs(&self) -> Result<Value> {394		if let Some(value) = &self.pkgs_override {395			return Ok(value.clone());396		}397		let Some(host_config) = &self.host_config else {398			bail!("local host has no host_config");399		};400		// TODO: Should nixos.options be cached?401		Ok(nix_go!(host_config.nixos.options._module.args.value.pkgs))402	}403}404405impl Config {406	pub async fn tagged_hostnames(&self, tag: &str) -> Result<Vec<String>> {407		let config = &self.config_field;408		let tagged: Vec<String> = nix_go_json!(config.taggedWith[{ tag }]);409		Ok(tagged)410	}411	pub async fn expand_owner_set(&self, owners: Vec<String>) -> Result<BTreeSet<String>> {412		let mut out = BTreeSet::new();413		for owner in owners {414			if let Some(tag) = owner.strip_prefix('@') {415				let hosts = self.tagged_hostnames(tag).await?;416				out.extend(hosts);417			} else {418				out.insert(owner);419			}420		}421		Ok(out)422	}423	pub fn local_host(&self) -> ConfigHost {424		ConfigHost {425			config: self.clone(),426			name: "<virtual localhost>".to_owned(),427			host_config: None,428			nixos_config: OnceCell::new(),429			groups: {430				let cell = OnceCell::new();431				let _ = cell.set(vec![]);432				cell433			},434			pkgs_override: Some(self.default_pkgs.clone()),435436			local: true,437			session: OnceLock::new(),438			deploy_kind: OnceCell::new(),439		}440	}441442	pub async fn host(&self, name: &str) -> Result<ConfigHost> {443		let config = &self.config_field;444		let host_config = nix_go!(config.hosts[{ name }]);445446		Ok(ConfigHost {447			config: self.clone(),448			name: name.to_owned(),449			host_config: Some(host_config),450			nixos_config: OnceCell::new(),451			groups: OnceCell::new(),452			pkgs_override: None,453454			// TODO: Remove with connectivit refactor455			local: self.localhost == name,456			session: OnceLock::new(),457			deploy_kind: OnceCell::new(),458		})459	}460	pub async fn list_hosts(&self) -> Result<Vec<ConfigHost>> {461		let config = &self.config_field;462		let names = nix_go!(config.hosts).list_fields().await?;463		let mut out = vec![];464		for name in names {465			out.push(self.host(&name).await?);466		}467		Ok(out)468	}469	// TODO: Replace usages with .host().nixos_config470	pub async fn system_config(&self, host: &str) -> Result<Value> {471		let fleet_field = &self.config_field;472		Ok(nix_go!(fleet_field.hosts[{ host }].nixos.config))473	}474475	/// Shared secrets configured in fleet.nix or in flake476	pub async fn list_configured_shared(&self) -> Result<Vec<String>> {477		let config_field = &self.config_field;478		Ok(nix_go!(config_field.sharedSecrets).list_fields().await?)479	}480	/// Shared secrets configured in fleet.nix481	pub fn list_shared(&self) -> Vec<String> {482		let data = self.data();483		data.shared_secrets.keys().cloned().collect()484	}485	pub fn has_shared(&self, name: &str) -> bool {486		let data = self.data();487		data.shared_secrets.contains_key(name)488	}489	pub fn replace_shared(&self, name: String, shared: FleetSharedSecret) {490		let mut data = self.data_mut();491		data.shared_secrets.insert(name.to_owned(), shared);492	}493	pub fn remove_shared(&self, secret: &str) {494		let mut data = self.data_mut();495		data.shared_secrets.remove(secret);496	}497498	pub fn list_secrets(&self, host: &str) -> Vec<String> {499		let data = self.data();500		let Some(secrets) = data.host_secrets.get(host) else {501			return Vec::new();502		};503		secrets.keys().cloned().collect()504	}505506	pub fn has_secret(&self, host: &str, secret: &str) -> bool {507		let data = self.data();508		let Some(host_secrets) = data.host_secrets.get(host) else {509			return false;510		};511		host_secrets.contains_key(secret)512	}513	pub fn insert_secret(&self, host: &str, secret: String, value: FleetSecret) {514		let mut data = self.data_mut();515		let host_secrets = data.host_secrets.entry(host.to_owned()).or_default();516		host_secrets.insert(secret, value);517	}518519	pub fn host_secret(&self, host: &str, secret: &str) -> Result<FleetSecret> {520		let data = self.data();521		let Some(host_secrets) = data.host_secrets.get(host) else {522			bail!("no secrets for machine {host}");523		};524		let Some(secret) = host_secrets.get(secret) else {525			bail!("machine {host} has no secret {secret}");526		};527		Ok(secret.clone())528	}529	pub fn shared_secret(&self, secret: &str) -> Result<FleetSharedSecret> {530		let data = self.data();531		let Some(secret) = data.shared_secrets.get(secret) else {532			bail!("no shared secret {secret}");533		};534		Ok(secret.clone())535	}536	pub async fn shared_secret_expected_owners(&self, secret: &str) -> Result<Vec<String>> {537		let config_field = &self.config_field;538		Ok(nix_go_json!(539			config_field.sharedSecrets[{ secret }].expectedOwners540		))541	}542543	// TODO: Should this be something modifiable from other processes?544	// E.g terraform provider might want to update FleetData (e.g secrets),545	// and current implementation assumes only one process holds current fleet.nix546	// Given that it is no longer needs to be a file for nix evaluation,547	// maybe it can be a .nix file for persistence, but accessible only548	// thru some shared state controller? Might it be stored in terraform549	// state provider?550	pub fn data(&self) -> MutexGuard<FleetData> {551		self.data.lock().unwrap()552	}553	pub fn data_mut(&self) -> MutexGuard<FleetData> {554		self.data.lock().unwrap()555	}556	pub fn save(&self) -> Result<()> {557		let mut tempfile = NamedTempFile::new_in(self.directory.clone()).context("failed to create updated version of fleet.nix in the same directory as original.\nDo you have write access to it? Access only to the fleet.nix won't be enough, the directory is used for atomic overwrite operation.\nIt is not recommended to use fleet by root anyway, move fleet project to your home directory.")?;558		let data = nixlike::serialize(&self.data() as &FleetData)?;559		tempfile.write_all(560			format!(561				"# This file contains fleet state and shouldn't be edited by hand\n\n{}\n\n# vim: ts=2 et nowrap\n",562				data563			)564			.as_bytes(),565		)?;566		let mut fleet_data_path = self.directory.clone();567		fleet_data_path.push("fleet.nix");568		tempfile.persist(fleet_data_path)?;569		Ok(())570	}571}
after · crates/fleet-base/src/host.rs
1use std::{2	cell::OnceCell,3	collections::BTreeSet,4	ffi::{OsStr, OsString},5	fmt::Display,6	io::Write,7	ops::Deref,8	path::PathBuf,9	str::FromStr,10	sync::{Arc, Mutex, MutexGuard, OnceLock},11};1213use anyhow::{anyhow, bail, ensure, Context, Result};14use fleet_shared::SecretData;15use nix_eval::{nix_go, nix_go_json, util::assert_warn, NixSession, Value};16use openssh::SessionBuilder;17use serde::de::DeserializeOwned;18use tempfile::NamedTempFile;1920use crate::{21	command::MyCommand,22	fleetdata::{FleetData, FleetSecret, FleetSharedSecret},23};2425pub struct FleetConfigInternals {26	/// Fleet project directory, containing fleet.nix file.27	pub directory: PathBuf,28	/// builtins.currentSystem29	pub local_system: String,30	pub data: Mutex<FleetData>,31	pub nix_args: Vec<OsString>,32	/// fleet_config.config33	pub config_field: Value,34	// TODO: Remove with connectivity refactor35	pub localhost: String,3637	/// import nixpkgs {system = local};38	pub default_pkgs: Value,39	/// inputs.nixpkgs40	pub nixpkgs: Value,4142	pub nix_session: NixSession,43}4445// TODO: Make field not pub46#[derive(Clone)]47pub struct Config(pub Arc<FleetConfigInternals>);4849impl Deref for Config {50	type Target = FleetConfigInternals;5152	fn deref(&self) -> &Self::Target {53		&self.054	}55}5657#[derive(Clone, Copy, Debug)]58pub enum EscalationStrategy {59	Sudo,60	Run0,61	Su,62}6364#[derive(Clone, PartialEq, Copy, Debug)]65pub enum DeployKind {66	/// NixOS => NixOS managed by fleet67	UpgradeToFleet,68	/// NixOS managed by fleet => NixOS managed by fleet69	Fleet,70	/// Remote host has /mnt, /mnt/boot mounted,71	/// generated config is added to fleet configuration.72	NixosInstall,73	/// Remote host has some system and nix installed in multi-user mode (/nix is owned by root),74	/// generated config is added to fleet configuration,75	/// and /etc/NIXOS_LUSTRATE exists, fleet will perform the rest.76	NixosLustrate,77}7879impl FromStr for DeployKind {80	type Err = anyhow::Error;81	fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {82		match s {83			"upgrade-to-fleet" => Ok(Self::UpgradeToFleet),84			"fleet" => Ok(Self::Fleet),85			"nixos-install" => Ok(Self::NixosInstall),86			v => bail!("unknown deploy_kind: {v}; expected on of \"upgrade-to-fleet\", \"fleet\", \"nixos-install\""),87		}88	}89}90pub struct ConfigHost {91	config: Config,92	pub name: String,93	groups: OnceCell<Vec<String>>,9495	deploy_kind: OnceCell<DeployKind>,9697	pub host_config: Option<Value>,98	pub nixos_config: OnceCell<Value>,99	pub pkgs_override: Option<Value>,100101	// TODO: Move command helpers away with connectivity refactor102	pub local: bool,103	pub session: OnceLock<Arc<openssh::Session>>,104}105// TODO: Move command helpers away with connectivity refactor106impl ConfigHost {107	pub fn set_deploy_kind(&self, kind: DeployKind) {108		self.deploy_kind109			.set(kind)110			.ok()111			.expect("deploy kind is already set");112	}113	pub async fn deploy_kind(&self) -> Result<DeployKind> {114		if let Some(kind) = self.deploy_kind.get() {115			return Ok(kind.clone());116		}117		let is_fleet_managed = match self.file_exists("/etc/FLEET_HOST").await {118			Ok(v) => v,119			Err(e) => {120				bail!("failed to query remote system kind: {}", e);121			}122		};123		if !is_fleet_managed {124			bail!(indoc::indoc! {"125				host is not marked as managed by fleet126				if you're not trying to lustrate/install system from scratch,127				you should either128					1. manually create /etc/FLEET_HOST file on the target host,129					2. use ?deploy_kind=fleet host argument if you're upgrading from older version of fleet130					3. use ?deploy_kind=upgrade_to_fleet if you're upgrading from plain nixos to fleet-managed nixos131			"});132		}133		// TOCTOU is possible134		let _ = self.deploy_kind.set(DeployKind::Fleet);135		Ok(self136			.deploy_kind137			.get()138			.expect("deploy kind is just set")139			.clone())140	}141	pub async fn escalation_strategy(&self) -> Result<EscalationStrategy> {142		// Prefer sudo, as run0 has some gotchas with polkit143		// and too many repeating prompts.144		if (self.find_in_path("sudo").await).is_ok() {145			return Ok(EscalationStrategy::Sudo);146		}147		if (self.find_in_path("run0").await).is_ok() {148			return Ok(EscalationStrategy::Run0);149		}150		Ok(EscalationStrategy::Su)151	}152	async fn open_session(&self) -> Result<Arc<openssh::Session>> {153		assert!(!self.local, "do not open ssh connection to local session");154		// FIXME: TOCTOU155		if let Some(session) = &self.session.get() {156			return Ok((*session).clone());157		};158		let session = SessionBuilder::default();159		let session = session160			.connect(&self.name)161			.await162			.map_err(|e| anyhow!("ssh error while connecting to {}: {e}", self.name))?;163		let session = Arc::new(session);164		self.session.set(session.clone()).expect("TOCTOU happened");165		Ok(session)166	}167	pub async fn mktemp_dir(&self) -> Result<String> {168		let mut cmd = self.cmd("mktemp").await?;169		cmd.arg("-d");170		let path = cmd.run_string().await?;171		Ok(path.trim_end().to_owned())172	}173	pub async fn file_exists(&self, path: impl AsRef<OsStr>) -> Result<bool> {174		let mut cmd = self.cmd("sh").await?;175		cmd.arg("-c")176			.arg("test -e \"$1\" && echo true || echo false")177			.arg("_")178			.arg(path);179		Ok(cmd.run_value().await?)180	}181	pub async fn read_file_bin(&self, path: impl AsRef<OsStr>) -> Result<Vec<u8>> {182		let mut cmd = self.cmd("cat").await?;183		cmd.arg(path);184		cmd.run_bytes().await185	}186	pub async fn read_file_text(&self, path: impl AsRef<OsStr>) -> Result<String> {187		let mut cmd = self.cmd("cat").await?;188		cmd.arg(path);189		cmd.run_string().await190	}191	pub async fn read_dir(&self, path: impl AsRef<OsStr>) -> Result<Vec<String>> {192		let mut cmd = self.cmd("ls").await?;193		cmd.arg(path);194		let out = cmd.run_string().await?;195		let mut lines = out.split('\n');196		if let Some(last) = lines.next_back() {197			ensure!(last.is_empty(), "output of ls should end with newline");198		}199		Ok(lines.map(ToOwned::to_owned).collect())200	}201	#[allow(dead_code)]202	pub async fn read_file_json<D: DeserializeOwned>(&self, path: impl AsRef<OsStr>) -> Result<D> {203		let text = self.read_file_text(path).await?;204		Ok(serde_json::from_str(&text)?)205	}206	pub async fn read_env(&self, env: &str) -> Result<String> {207		let mut cmd = self.cmd("printenv").await?;208		cmd.arg(env);209		cmd.run_string().await210	}211	pub async fn find_in_path(&self, command: &str) -> Result<String> {212		// // `which` is not a part of coreutils, and it might not exist on machine.213		// let path = self.read_env("PATH").await?;214		// // Assuming delimiter is :, we don't work with windows host, this check will be much215		// // more sophisticated in remowt backend (and quicker, since actual PATH search will be done on remote machine)216		// for ele in path.split(':') {217		// 	let test_path = format!("{ele}/{cmd}");218		// 	test -x etc219		// }220		// let mut cmd = self.cmd("printenv").await?;221		// cmd.arg(env);222		// Ok(cmd.run_string().await?)223		// Assuming this is an environment issue if which doesn't exist, will be fixed with remowt.224		let mut cmd = self225			.cmd_escalation(226				// Not used227				EscalationStrategy::Su,228				"which",229			)230			.await?;231		cmd.arg(command);232		cmd.run_string().await233	}234	pub async fn read_file_value<D: FromStr>(&self, path: impl AsRef<OsStr>) -> Result<D>235	where236		<D as FromStr>::Err: Display,237	{238		let text = self.read_file_text(path).await?;239		D::from_str(&text).map_err(|e| anyhow!("failed to parse value: {e}"))240	}241	pub async fn cmd(&self, cmd: impl AsRef<OsStr>) -> Result<MyCommand> {242		self.cmd_escalation(self.escalation_strategy().await?, cmd)243			.await244	}245	pub async fn cmd_escalation(246		&self,247		escalation: EscalationStrategy,248		cmd: impl AsRef<OsStr>,249	) -> Result<MyCommand> {250		if self.local {251			Ok(MyCommand::new(escalation, cmd))252		} else {253			let session = self.open_session().await?;254			Ok(MyCommand::new_on(escalation, cmd, session))255		}256	}257	pub async fn nix_cmd(&self) -> Result<MyCommand> {258		let mut nix = self.cmd("nix").await?;259		nix.args([260			"--extra-experimental-features",261			"nix-command",262			"--extra-experimental-features",263			"flakes",264		]);265		Ok(nix)266	}267268	pub async fn decrypt(&self, data: SecretData) -> Result<Vec<u8>> {269		ensure!(data.encrypted, "secret is not encrypted");270		let mut cmd = self.cmd("fleet-install-secrets").await?;271		cmd.arg("decrypt").eqarg("--secret", data.to_string());272		let encoded = cmd273			.sudo()274			.run_string()275			.await276			.context("failed to call remote host for decrypt")?;277		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;278		ensure!(!data.encrypted, "secret came out encrypted");279		Ok(data.data)280	}281	pub async fn reencrypt(&self, data: SecretData, targets: Vec<String>) -> Result<SecretData> {282		ensure!(data.encrypted, "secret is not encrypted");283		let mut cmd = self.cmd("fleet-install-secrets").await?;284		cmd.arg("reencrypt").eqarg("--secret", data.to_string());285		for target in targets {286			let key = self.config.key(&target).await?;287			cmd.eqarg("--targets", key);288		}289		let encoded = cmd290			.sudo()291			.run_string()292			.await293			.context("failed to call remote host for decrypt")?;294		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;295		ensure!(data.encrypted, "secret came out not encrypted");296		Ok(data)297	}298	/// Returns path for futureproofing, as path might change i.e on conversion to CA299	pub async fn remote_derivation(&self, path: &PathBuf) -> Result<PathBuf> {300		if self.local {301			// Path is located locally, thus already trusted.302			return Ok(path.to_owned());303		}304		let mut nix = MyCommand::new(305			// Not used306			EscalationStrategy::Su,307			"nix",308		);309		nix.arg("copy").arg("--substitute-on-destination");310311		match self.deploy_kind().await? {312			DeployKind::Fleet | DeployKind::UpgradeToFleet | DeployKind::NixosLustrate => {313				nix.comparg("--to", format!("ssh-ng://{}", self.name));314			}315			DeployKind::NixosInstall => {316				nix317					// Signature checking makes no sense with remote-store store argument set, as we're not even interacting with remote nix daemon318					.arg("--no-check-sigs")319					.comparg(320						"--to",321						format!("ssh-ng://root@{}-install?remote-store=/mnt", self.name),322					);323			}324		}325		nix.arg(path);326		nix.run_nix().await.context("nix copy")?;327		Ok(path.to_owned())328	}329	pub async fn systemctl_stop(&self, name: &str) -> Result<()> {330		let mut cmd = self.cmd("systemctl").await?;331		cmd.arg("stop").arg(name);332		cmd.sudo().run().await333	}334	pub async fn systemctl_start(&self, name: &str) -> Result<()> {335		let mut cmd = self.cmd("systemctl").await?;336		cmd.arg("start").arg(name);337		cmd.sudo().run().await338	}339340	pub async fn rm_file(&self, path: impl AsRef<OsStr>, sudo: bool) -> Result<()> {341		let mut cmd = self.cmd("rm").await?;342		cmd.arg("-f").arg(path);343		if sudo {344			cmd = cmd.sudo()345		}346		cmd.run().await347	}348}349impl ConfigHost {350	// TOCTOU is possible here in case if config is changed, but this case is not handled anywhere anyway,351	// assuming getting tags always returns the same value.352	pub async fn tags(&self) -> Result<Vec<String>> {353		if let Some(v) = self.groups.get() {354			return Ok(v.clone());355		}356		let Some(host_config) = &self.host_config else {357			return Ok(vec![]);358		};359		let tags: Vec<String> = nix_go_json!(host_config.tags);360361		let _ = self.groups.set(tags.clone());362363		Ok(tags)364	}365	pub async fn nixos_config(&self) -> Result<Value> {366		if let Some(v) = self.nixos_config.get() {367			return Ok(v.clone());368		}369		let Some(host_config) = &self.host_config else {370			bail!("local host has no nixos_config");371		};372		let nixos_config = nix_go!(host_config.nixos.config);373		assert_warn("nixos config evaluation", &nixos_config).await?;374375		let _ = self.nixos_config.set(nixos_config.clone());376377		Ok(nixos_config)378	}379380	pub async fn list_configured_secrets(&self) -> Result<Vec<String>> {381		let nixos = self.nixos_config().await?;382		let secrets = nix_go!(nixos.secrets);383		let mut out = Vec::new();384		for name in secrets.list_fields().await? {385			let secret = nix_go!(secrets[{ name }]);386			let is_shared: bool = nix_go_json!(secret.shared);387			if is_shared {388				continue;389			}390			out.push(name);391		}392		Ok(out)393	}394	pub async fn secret_field(&self, name: &str) -> Result<Value> {395		let nixos = self.nixos_config().await?;396		Ok(nix_go!(nixos.secrets[{ name }]))397	}398399	/// Packages for this host, resolved with nixpkgs overlays400	pub async fn pkgs(&self) -> Result<Value> {401		if let Some(value) = &self.pkgs_override {402			return Ok(value.clone());403		}404		let Some(host_config) = &self.host_config else {405			bail!("local host has no host_config");406		};407		// TODO: Should nixos.options be cached?408		Ok(nix_go!(host_config.nixos.options._module.args.value.pkgs))409	}410}411412impl Config {413	pub async fn tagged_hostnames(&self, tag: &str) -> Result<Vec<String>> {414		let config = &self.config_field;415		let tagged: Vec<String> = nix_go_json!(config.taggedWith[{ tag }]);416		Ok(tagged)417	}418	pub async fn expand_owner_set(&self, owners: Vec<String>) -> Result<BTreeSet<String>> {419		let mut out = BTreeSet::new();420		for owner in owners {421			if let Some(tag) = owner.strip_prefix('@') {422				let hosts = self.tagged_hostnames(tag).await?;423				out.extend(hosts);424			} else {425				out.insert(owner);426			}427		}428		Ok(out)429	}430	pub fn local_host(&self) -> ConfigHost {431		ConfigHost {432			config: self.clone(),433			name: "<virtual localhost>".to_owned(),434			host_config: None,435			nixos_config: OnceCell::new(),436			groups: {437				let cell = OnceCell::new();438				let _ = cell.set(vec![]);439				cell440			},441			pkgs_override: Some(self.default_pkgs.clone()),442443			local: true,444			session: OnceLock::new(),445			deploy_kind: OnceCell::new(),446		}447	}448449	pub async fn host(&self, name: &str) -> Result<ConfigHost> {450		let config = &self.config_field;451		let host_config = nix_go!(config.hosts[{ name }]);452453		Ok(ConfigHost {454			config: self.clone(),455			name: name.to_owned(),456			host_config: Some(host_config),457			nixos_config: OnceCell::new(),458			groups: OnceCell::new(),459			pkgs_override: None,460461			// TODO: Remove with connectivit refactor462			local: self.localhost == name,463			session: OnceLock::new(),464			deploy_kind: OnceCell::new(),465		})466	}467	pub async fn list_hosts(&self) -> Result<Vec<ConfigHost>> {468		let config = &self.config_field;469		let names = nix_go!(config.hosts).list_fields().await?;470		let mut out = vec![];471		for name in names {472			out.push(self.host(&name).await?);473		}474		Ok(out)475	}476	// TODO: Replace usages with .host().nixos_config477	pub async fn system_config(&self, host: &str) -> Result<Value> {478		let fleet_field = &self.config_field;479		Ok(nix_go!(fleet_field.hosts[{ host }].nixos.config))480	}481482	/// Shared secrets configured in fleet.nix or in flake483	pub async fn list_configured_shared(&self) -> Result<Vec<String>> {484		let config_field = &self.config_field;485		Ok(nix_go!(config_field.sharedSecrets).list_fields().await?)486	}487	/// Shared secrets configured in fleet.nix488	pub fn list_shared(&self) -> Vec<String> {489		let data = self.data();490		data.shared_secrets.keys().cloned().collect()491	}492	pub fn has_shared(&self, name: &str) -> bool {493		let data = self.data();494		data.shared_secrets.contains_key(name)495	}496	pub fn replace_shared(&self, name: String, shared: FleetSharedSecret) {497		let mut data = self.data_mut();498		data.shared_secrets.insert(name.to_owned(), shared);499	}500	pub fn remove_shared(&self, secret: &str) {501		let mut data = self.data_mut();502		data.shared_secrets.remove(secret);503	}504505	pub fn list_secrets(&self, host: &str) -> Vec<String> {506		let data = self.data();507		let Some(secrets) = data.host_secrets.get(host) else {508			return Vec::new();509		};510		secrets.keys().cloned().collect()511	}512513	pub fn has_secret(&self, host: &str, secret: &str) -> bool {514		let data = self.data();515		let Some(host_secrets) = data.host_secrets.get(host) else {516			return false;517		};518		host_secrets.contains_key(secret)519	}520	pub fn insert_secret(&self, host: &str, secret: String, value: FleetSecret) {521		let mut data = self.data_mut();522		let host_secrets = data.host_secrets.entry(host.to_owned()).or_default();523		host_secrets.insert(secret, value);524	}525526	pub fn host_secret(&self, host: &str, secret: &str) -> Result<FleetSecret> {527		let data = self.data();528		let Some(host_secrets) = data.host_secrets.get(host) else {529			bail!("no secrets for machine {host}");530		};531		let Some(secret) = host_secrets.get(secret) else {532			bail!("machine {host} has no secret {secret}");533		};534		Ok(secret.clone())535	}536	pub fn shared_secret(&self, secret: &str) -> Result<FleetSharedSecret> {537		let data = self.data();538		let Some(secret) = data.shared_secrets.get(secret) else {539			bail!("no shared secret {secret}");540		};541		Ok(secret.clone())542	}543	pub async fn shared_secret_expected_owners(&self, secret: &str) -> Result<Vec<String>> {544		let config_field = &self.config_field;545		Ok(nix_go_json!(546			config_field.sharedSecrets[{ secret }].expectedOwners547		))548	}549550	// TODO: Should this be something modifiable from other processes?551	// E.g terraform provider might want to update FleetData (e.g secrets),552	// and current implementation assumes only one process holds current fleet.nix553	// Given that it is no longer needs to be a file for nix evaluation,554	// maybe it can be a .nix file for persistence, but accessible only555	// thru some shared state controller? Might it be stored in terraform556	// state provider?557	pub fn data(&self) -> MutexGuard<FleetData> {558		self.data.lock().unwrap()559	}560	pub fn data_mut(&self) -> MutexGuard<FleetData> {561		self.data.lock().unwrap()562	}563	pub fn save(&self) -> Result<()> {564		let mut tempfile = NamedTempFile::new_in(self.directory.clone()).context("failed to create updated version of fleet.nix in the same directory as original.\nDo you have write access to it? Access only to the fleet.nix won't be enough, the directory is used for atomic overwrite operation.\nIt is not recommended to use fleet by root anyway, move fleet project to your home directory.")?;565		let data = nixlike::serialize(&self.data() as &FleetData)?;566		tempfile.write_all(567			format!(568				"# This file contains fleet state and shouldn't be edited by hand\n\n{}\n\n# vim: ts=2 et nowrap\n",569				data570			)571			.as_bytes(),572		)?;573		let mut fleet_data_path = self.directory.clone();574		fleet_data_path.push("fleet.nix");575		tempfile.persist(fleet_data_path)?;576		Ok(())577	}578}
modifiedcrates/fleet-base/src/opts.rsdiffbeforeafterboth
--- a/crates/fleet-base/src/opts.rs
+++ b/crates/fleet-base/src/opts.rs
@@ -6,7 +6,7 @@
 	sync::{Arc, Mutex},
 };
 
-use anyhow::{Context, Result};
+use anyhow::{bail, Context, Result};
 use clap::Parser;
 use nix_eval::{nix_go, util::assert_warn, NixSessionPool, Value};
 use nom::{
@@ -182,7 +182,23 @@
 
 	// TODO: Config should be detached from opts.
 	pub async fn build(&self, nix_args: Vec<OsString>, assert: bool) -> Result<Config> {
-		let directory = current_dir()?;
+		let cwd = current_dir()?;
+		let mut directory = cwd.clone();
+		let mut fleet_data_path = directory.join("fleet.nix");
+		while !fleet_data_path.is_file() {
+			// fleet.nix
+			fleet_data_path.pop();
+			if !directory.pop() || !fleet_data_path.pop() {
+				bail!(
+					"fleet.nix not found at {} or any of the parent directories",
+					cwd.display()
+				);
+			}
+			fleet_data_path.push("fleet.nix");
+		}
+		let bytes =
+			std::fs::read_to_string(&fleet_data_path).context("reading fleet state (fleet.nix)")?;
+		let data: Mutex<FleetData> = nixlike::parse_str(&bytes)?;
 
 		let pool = NixSessionPool::new(
 			directory.as_os_str().to_owned(),
@@ -193,12 +209,6 @@
 		let nix_session = pool.get().await?;
 
 		let builtins_field = Value::binding(nix_session.clone(), "builtins").await?;
-
-		let mut fleet_data_path = directory.clone();
-		fleet_data_path.push("fleet.nix");
-		let bytes =
-			std::fs::read_to_string(fleet_data_path).context("reading fleet state (fleet.nix)")?;
-		let data: Mutex<FleetData> = nixlike::parse_str(&bytes)?;
 
 		let fleet_root = Value::binding(nix_session.clone(), "fleetConfigurations").await?;
 		let fleet_field = nix_go!(fleet_root.default({ data }));