git.delta.rocks / jrsonnet / refs/commits / a5650649afd2

difftreelog

source

crates/fleet-base/src/host.rs18.2 KiBsourcehistory
1use std::{2	cell::OnceCell,3	collections::BTreeSet,4	ffi::{OsStr, OsString},5	fmt::Display,6	io::Write,7	ops::Deref,8	path::PathBuf,9	str::FromStr,10	sync::{Arc, Mutex, MutexGuard, OnceLock},11};1213use anyhow::{anyhow, bail, ensure, Context, Result};14use fleet_shared::SecretData;15use nix_eval::{nix_go, nix_go_json, util::assert_warn, NixSession, Value};16use openssh::SessionBuilder;17use serde::de::DeserializeOwned;18use tempfile::NamedTempFile;1920use crate::{21	command::MyCommand,22	fleetdata::{FleetData, FleetSecret, FleetSharedSecret},23};2425pub struct FleetConfigInternals {26	/// Fleet project directory, containing fleet.nix file.27	pub directory: PathBuf,28	/// builtins.currentSystem29	pub local_system: String,30	pub data: Mutex<FleetData>,31	pub nix_args: Vec<OsString>,32	/// fleet_config.config33	pub config_field: Value,34	// TODO: Remove with connectivity refactor35	pub localhost: String,3637	/// import nixpkgs {system = local};38	pub default_pkgs: Value,39	/// inputs.nixpkgs40	pub nixpkgs: Value,4142	pub nix_session: NixSession,43}4445// TODO: Make field not pub46#[derive(Clone)]47pub struct Config(pub Arc<FleetConfigInternals>);4849impl Deref for Config {50	type Target = FleetConfigInternals;5152	fn deref(&self) -> &Self::Target {53		&self.054	}55}5657#[derive(Clone, Copy, Debug)]58pub enum EscalationStrategy {59	Sudo,60	Run0,61	Su,62}6364#[derive(Clone, PartialEq, Copy, Debug)]65pub enum DeployKind {66	/// NixOS => NixOS managed by fleet67	UpgradeToFleet,68	/// NixOS managed by fleet => NixOS managed by fleet69	Fleet,70	/// Remote host has /mnt, /mnt/boot mounted,71	/// generated config is added to fleet configuration.72	NixosInstall,73	/// Remote host has some system and nix installed in multi-user mode (/nix is owned by root),74	/// generated config is added to fleet configuration,75	/// and /etc/NIXOS_LUSTRATE exists, fleet will perform the rest.76	NixosLustrate,77}7879impl FromStr for DeployKind {80	type Err = anyhow::Error;81	fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {82		match s {83			"upgrade-to-fleet" => Ok(Self::UpgradeToFleet),84			"fleet" => Ok(Self::Fleet),85			"nixos-install" => Ok(Self::NixosInstall),86			"nixos-lustrate" => Ok(Self::NixosLustrate),87			v => bail!("unknown deploy_kind: {v}; expected on of \"upgrade-to-fleet\", \"fleet\", \"nixos-install\", \"nixos-lustrate\""),88		}89	}90}91pub struct ConfigHost {92	config: Config,93	pub name: String,94	groups: OnceCell<Vec<String>>,9596	deploy_kind: OnceCell<DeployKind>,9798	pub host_config: Option<Value>,99	pub nixos_config: OnceCell<Value>,100	pub nixos_unchecked_config: OnceCell<Value>,101	pub pkgs_override: Option<Value>,102103	// TODO: Move command helpers away with connectivity refactor104	pub local: bool,105	pub session: OnceLock<Arc<openssh::Session>>,106}107// TODO: Move command helpers away with connectivity refactor108impl ConfigHost {109	pub fn set_deploy_kind(&self, kind: DeployKind) {110		self.deploy_kind111			.set(kind)112			.ok()113			.expect("deploy kind is already set");114	}115	pub async fn deploy_kind(&self) -> Result<DeployKind> {116		if let Some(kind) = self.deploy_kind.get() {117			return Ok(kind.clone());118		}119		let is_fleet_managed = match self.file_exists("/etc/FLEET_HOST").await {120			Ok(v) => v,121			Err(e) => {122				bail!("failed to query remote system kind: {}", e);123			}124		};125		if !is_fleet_managed {126			bail!(indoc::indoc! {"127				host is not marked as managed by fleet128				if you're not trying to lustrate/install system from scratch,129				you should either130					1. manually create /etc/FLEET_HOST file on the target host,131					2. use ?deploy_kind=fleet host argument if you're upgrading from older version of fleet132					3. use ?deploy_kind=upgrade_to_fleet if you're upgrading from plain nixos to fleet-managed nixos133			"});134		}135		// TOCTOU is possible136		let _ = self.deploy_kind.set(DeployKind::Fleet);137		Ok(self138			.deploy_kind139			.get()140			.expect("deploy kind is just set")141			.clone())142	}143	pub async fn escalation_strategy(&self) -> Result<EscalationStrategy> {144		// Prefer sudo, as run0 has some gotchas with polkit145		// and too many repeating prompts.146		if (self.find_in_path("sudo").await).is_ok() {147			return Ok(EscalationStrategy::Sudo);148		}149		if (self.find_in_path("run0").await).is_ok() {150			return Ok(EscalationStrategy::Run0);151		}152		Ok(EscalationStrategy::Su)153	}154	async fn open_session(&self) -> Result<Arc<openssh::Session>> {155		assert!(!self.local, "do not open ssh connection to local session");156		// FIXME: TOCTOU157		if let Some(session) = &self.session.get() {158			return Ok((*session).clone());159		};160		let session = SessionBuilder::default();161		let session = session162			.connect(&self.name)163			.await164			.map_err(|e| anyhow!("ssh error while connecting to {}: {e}", self.name))?;165		let session = Arc::new(session);166		self.session.set(session.clone()).expect("TOCTOU happened");167		Ok(session)168	}169	pub async fn mktemp_dir(&self) -> Result<String> {170		let mut cmd = self.cmd("mktemp").await?;171		cmd.arg("-d");172		let path = cmd.run_string().await?;173		Ok(path.trim_end().to_owned())174	}175	pub async fn file_exists(&self, path: impl AsRef<OsStr>) -> Result<bool> {176		let mut cmd = self.cmd("sh").await?;177		cmd.arg("-c")178			.arg("test -e \"$1\" && echo true || echo false")179			.arg("_")180			.arg(path);181		Ok(cmd.run_value().await?)182	}183	pub async fn read_file_bin(&self, path: impl AsRef<OsStr>) -> Result<Vec<u8>> {184		let mut cmd = self.cmd("cat").await?;185		cmd.arg(path);186		cmd.run_bytes().await187	}188	pub async fn read_file_text(&self, path: impl AsRef<OsStr>) -> Result<String> {189		let mut cmd = self.cmd("cat").await?;190		cmd.arg(path);191		cmd.run_string().await192	}193	pub async fn read_dir(&self, path: impl AsRef<OsStr>) -> Result<Vec<String>> {194		let mut cmd = self.cmd("ls").await?;195		cmd.arg(path);196		let out = cmd.run_string().await?;197		let mut lines = out.split('\n');198		if let Some(last) = lines.next_back() {199			ensure!(last.is_empty(), "output of ls should end with newline");200		}201		Ok(lines.map(ToOwned::to_owned).collect())202	}203	#[allow(dead_code)]204	pub async fn read_file_json<D: DeserializeOwned>(&self, path: impl AsRef<OsStr>) -> Result<D> {205		let text = self.read_file_text(path).await?;206		Ok(serde_json::from_str(&text)?)207	}208	pub async fn read_env(&self, env: &str) -> Result<String> {209		let mut cmd = self.cmd("printenv").await?;210		cmd.arg(env);211		cmd.run_string().await212	}213	pub async fn find_in_path(&self, command: &str) -> Result<String> {214		// // `which` is not a part of coreutils, and it might not exist on machine.215		// let path = self.read_env("PATH").await?;216		// // Assuming delimiter is :, we don't work with windows host, this check will be much217		// // more sophisticated in remowt backend (and quicker, since actual PATH search will be done on remote machine)218		// for ele in path.split(':') {219		// 	let test_path = format!("{ele}/{cmd}");220		// 	test -x etc221		// }222		// let mut cmd = self.cmd("printenv").await?;223		// cmd.arg(env);224		// Ok(cmd.run_string().await?)225		// Assuming this is an environment issue if which doesn't exist, will be fixed with remowt.226		let mut cmd = self227			.cmd_escalation(228				// Not used229				EscalationStrategy::Su,230				"which",231			)232			.await?;233		cmd.arg(command);234		cmd.run_string().await235	}236	pub async fn read_file_value<D: FromStr>(&self, path: impl AsRef<OsStr>) -> Result<D>237	where238		<D as FromStr>::Err: Display,239	{240		let text = self.read_file_text(path).await?;241		D::from_str(&text).map_err(|e| anyhow!("failed to parse value: {e}"))242	}243	pub async fn cmd(&self, cmd: impl AsRef<OsStr>) -> Result<MyCommand> {244		self.cmd_escalation(self.escalation_strategy().await?, cmd)245			.await246	}247	pub async fn cmd_escalation(248		&self,249		escalation: EscalationStrategy,250		cmd: impl AsRef<OsStr>,251	) -> Result<MyCommand> {252		if self.local {253			Ok(MyCommand::new(escalation, cmd))254		} else {255			let session = self.open_session().await?;256			Ok(MyCommand::new_on(escalation, cmd, session))257		}258	}259	pub async fn nix_cmd(&self) -> Result<MyCommand> {260		let mut nix = self.cmd("nix").await?;261		nix.args([262			"--extra-experimental-features",263			"nix-command",264			"--extra-experimental-features",265			"flakes",266		]);267		Ok(nix)268	}269270	pub async fn decrypt(&self, data: SecretData) -> Result<Vec<u8>> {271		ensure!(data.encrypted, "secret is not encrypted");272		let mut cmd = self.cmd("fleet-install-secrets").await?;273		cmd.arg("decrypt").eqarg("--secret", data.to_string());274		let encoded = cmd275			.sudo()276			.run_string()277			.await278			.context("failed to call remote host for decrypt")?;279		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;280		ensure!(!data.encrypted, "secret came out encrypted");281		Ok(data.data)282	}283	pub async fn reencrypt(&self, data: SecretData, targets: Vec<String>) -> Result<SecretData> {284		ensure!(data.encrypted, "secret is not encrypted");285		let mut cmd = self.cmd("fleet-install-secrets").await?;286		cmd.arg("reencrypt").eqarg("--secret", data.to_string());287		for target in targets {288			let key = self.config.key(&target).await?;289			cmd.eqarg("--targets", key);290		}291		let encoded = cmd292			.sudo()293			.run_string()294			.await295			.context("failed to call remote host for decrypt")?;296		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;297		ensure!(data.encrypted, "secret came out not encrypted");298		Ok(data)299	}300	/// Returns path for futureproofing, as path might change i.e on conversion to CA301	pub async fn remote_derivation(&self, path: &PathBuf) -> Result<PathBuf> {302		if self.local {303			// Path is located locally, thus already trusted.304			return Ok(path.to_owned());305		}306		let mut nix = MyCommand::new(307			// Not used308			EscalationStrategy::Su,309			"nix",310		);311		nix.arg("copy").arg("--substitute-on-destination");312313		match self.deploy_kind().await? {314			DeployKind::Fleet | DeployKind::UpgradeToFleet | DeployKind::NixosLustrate => {315				nix.comparg("--to", format!("ssh-ng://{}", self.name));316			}317			DeployKind::NixosInstall => {318				nix319					// Signature checking makes no sense with remote-store store argument set, as we're not even interacting with remote nix daemon320					.arg("--no-check-sigs")321					.comparg(322						"--to",323						format!("ssh-ng://root@{}?remote-store=/mnt", self.name),324					);325			}326		}327		nix.arg(path);328		nix.run_nix().await.context("nix copy")?;329		Ok(path.to_owned())330	}331	pub async fn systemctl_stop(&self, name: &str) -> Result<()> {332		let mut cmd = self.cmd("systemctl").await?;333		cmd.arg("stop").arg(name);334		cmd.sudo().run().await335	}336	pub async fn systemctl_start(&self, name: &str) -> Result<()> {337		let mut cmd = self.cmd("systemctl").await?;338		cmd.arg("start").arg(name);339		cmd.sudo().run().await340	}341342	pub async fn rm_file(&self, path: impl AsRef<OsStr>, sudo: bool) -> Result<()> {343		let mut cmd = self.cmd("rm").await?;344		cmd.arg("-f").arg(path);345		if sudo {346			cmd = cmd.sudo()347		}348		cmd.run().await349	}350}351impl ConfigHost {352	// TOCTOU is possible here in case if config is changed, but this case is not handled anywhere anyway,353	// assuming getting tags always returns the same value.354	pub async fn tags(&self) -> Result<Vec<String>> {355		if let Some(v) = self.groups.get() {356			return Ok(v.clone());357		}358		let Some(host_config) = &self.host_config else {359			return Ok(vec![]);360		};361		let tags: Vec<String> = nix_go_json!(host_config.tags);362363		let _ = self.groups.set(tags.clone());364365		Ok(tags)366	}367	pub async fn nixos_config(&self) -> Result<Value> {368		if let Some(v) = self.nixos_config.get() {369			return Ok(v.clone());370		}371		let Some(host_config) = &self.host_config else {372			bail!("local host has no nixos_config");373		};374		let nixos_config = nix_go!(host_config.nixos.config);375		assert_warn("nixos config evaluation", &nixos_config).await?;376377		let _ = self.nixos_config.set(nixos_config.clone());378379		Ok(nixos_config)380	}381	pub async fn nixos_unchecked_config(&self) -> Result<Value> {382		if let Some(v) = self.nixos_unchecked_config.get() {383			return Ok(v.clone());384		}385		let Some(host_config) = &self.host_config else {386			bail!("local host has no nixos_config");387		};388		let nixos_config = nix_go!(host_config.nixos_unchecked.config);389390		let _ = self.nixos_unchecked_config.set(nixos_config.clone());391392		Ok(nixos_config)393	}394395	pub async fn list_configured_secrets(&self) -> Result<Vec<String>> {396		let nixos = self.nixos_unchecked_config().await?;397		let secrets = nix_go!(nixos.secrets);398		let mut out = Vec::new();399		for name in secrets.list_fields().await? {400			let secret = nix_go!(secrets[{ name }]);401			let is_shared: bool = nix_go_json!(secret.shared);402			if is_shared {403				continue;404			}405			out.push(name);406		}407		Ok(out)408	}409	pub async fn secret_field(&self, name: &str) -> Result<Value> {410		let nixos = self.nixos_unchecked_config().await?;411		Ok(nix_go!(nixos.secrets[{ name }]))412	}413414	/// Packages for this host, resolved with nixpkgs overlays415	pub async fn pkgs(&self) -> Result<Value> {416		if let Some(value) = &self.pkgs_override {417			return Ok(value.clone());418		}419		let Some(host_config) = &self.host_config else {420			bail!("local host has no host_config");421		};422		// TODO: Should nixos.options be cached?423		Ok(nix_go!(host_config.nixos.options._module.args.value.pkgs))424	}425}426427impl Config {428	pub async fn tagged_hostnames(&self, tag: &str) -> Result<Vec<String>> {429		let config = &self.config_field;430		let tagged: Vec<String> = nix_go_json!(config.taggedWith[{ tag }]);431		Ok(tagged)432	}433	pub async fn expand_owner_set(&self, owners: Vec<String>) -> Result<BTreeSet<String>> {434		let mut out = BTreeSet::new();435		for owner in owners {436			if let Some(tag) = owner.strip_prefix('@') {437				let hosts = self.tagged_hostnames(tag).await?;438				out.extend(hosts);439			} else {440				out.insert(owner);441			}442		}443		Ok(out)444	}445	pub fn local_host(&self) -> ConfigHost {446		ConfigHost {447			config: self.clone(),448			name: "<virtual localhost>".to_owned(),449			host_config: None,450			nixos_config: OnceCell::new(),451			nixos_unchecked_config: OnceCell::new(),452			groups: {453				let cell = OnceCell::new();454				let _ = cell.set(vec![]);455				cell456			},457			pkgs_override: Some(self.default_pkgs.clone()),458459			local: true,460			session: OnceLock::new(),461			deploy_kind: OnceCell::new(),462		}463	}464465	pub async fn host(&self, name: &str) -> Result<ConfigHost> {466		let config = &self.config_field;467		let host_config = nix_go!(config.hosts[{ name }]);468469		Ok(ConfigHost {470			config: self.clone(),471			name: name.to_owned(),472			host_config: Some(host_config),473			nixos_config: OnceCell::new(),474			nixos_unchecked_config: OnceCell::new(),475			groups: OnceCell::new(),476			pkgs_override: None,477478			// TODO: Remove with connectivit refactor479			local: self.localhost == name,480			session: OnceLock::new(),481			deploy_kind: OnceCell::new(),482		})483	}484	pub async fn list_hosts(&self) -> Result<Vec<ConfigHost>> {485		let config = &self.config_field;486		let names = nix_go!(config.hosts).list_fields().await?;487		let mut out = vec![];488		for name in names {489			out.push(self.host(&name).await?);490		}491		Ok(out)492	}493	// TODO: Replace usages with .host().nixos_config494	pub async fn system_config(&self, host: &str) -> Result<Value> {495		let fleet_field = &self.config_field;496		Ok(nix_go!(fleet_field.hosts[{ host }].nixos.config))497	}498499	/// Shared secrets configured in fleet.nix or in flake500	pub async fn list_configured_shared(&self) -> Result<Vec<String>> {501		let config_field = &self.config_field;502		Ok(nix_go!(config_field.sharedSecrets).list_fields().await?)503	}504	/// Shared secrets configured in fleet.nix505	pub fn list_shared(&self) -> Vec<String> {506		let data = self.data();507		data.shared_secrets.keys().cloned().collect()508	}509	pub fn has_shared(&self, name: &str) -> bool {510		let data = self.data();511		data.shared_secrets.contains_key(name)512	}513	pub fn replace_shared(&self, name: String, shared: FleetSharedSecret) {514		let mut data = self.data_mut();515		data.shared_secrets.insert(name.to_owned(), shared);516	}517	pub fn remove_shared(&self, secret: &str) {518		let mut data = self.data_mut();519		data.shared_secrets.remove(secret);520	}521522	pub fn list_secrets(&self, host: &str) -> Vec<String> {523		let data = self.data();524		let Some(secrets) = data.host_secrets.get(host) else {525			return Vec::new();526		};527		secrets.keys().cloned().collect()528	}529530	pub fn has_secret(&self, host: &str, secret: &str) -> bool {531		let data = self.data();532		let Some(host_secrets) = data.host_secrets.get(host) else {533			return false;534		};535		host_secrets.contains_key(secret)536	}537	pub fn insert_secret(&self, host: &str, secret: String, value: FleetSecret) {538		let mut data = self.data_mut();539		let host_secrets = data.host_secrets.entry(host.to_owned()).or_default();540		host_secrets.insert(secret, value);541	}542543	pub fn host_secret(&self, host: &str, secret: &str) -> Result<FleetSecret> {544		let data = self.data();545		let Some(host_secrets) = data.host_secrets.get(host) else {546			bail!("no secrets for machine {host}");547		};548		let Some(secret) = host_secrets.get(secret) else {549			bail!("machine {host} has no secret {secret}");550		};551		Ok(secret.clone())552	}553	pub fn shared_secret(&self, secret: &str) -> Result<FleetSharedSecret> {554		let data = self.data();555		let Some(secret) = data.shared_secrets.get(secret) else {556			bail!("no shared secret {secret}");557		};558		Ok(secret.clone())559	}560	pub async fn shared_secret_expected_owners(&self, secret: &str) -> Result<Vec<String>> {561		let config_field = &self.config_field;562		Ok(nix_go_json!(563			config_field.sharedSecrets[{ secret }].expectedOwners564		))565	}566567	// TODO: Should this be something modifiable from other processes?568	// E.g terraform provider might want to update FleetData (e.g secrets),569	// and current implementation assumes only one process holds current fleet.nix570	// Given that it is no longer needs to be a file for nix evaluation,571	// maybe it can be a .nix file for persistence, but accessible only572	// thru some shared state controller? Might it be stored in terraform573	// state provider?574	pub fn data(&self) -> MutexGuard<FleetData> {575		self.data.lock().unwrap()576	}577	pub fn data_mut(&self) -> MutexGuard<FleetData> {578		self.data.lock().unwrap()579	}580	pub fn save(&self) -> Result<()> {581		let mut tempfile = NamedTempFile::new_in(self.directory.clone()).context("failed to create updated version of fleet.nix in the same directory as original.\nDo you have write access to it? Access only to the fleet.nix won't be enough, the directory is used for atomic overwrite operation.\nIt is not recommended to use fleet by root anyway, move fleet project to your home directory.")?;582		let data = nixlike::serialize(&self.data() as &FleetData)?;583		tempfile.write_all(584			format!(585				"# This file contains fleet state and shouldn't be edited by hand\n\n{}\n\n# vim: ts=2 et nowrap\n",586				data587			)588			.as_bytes(),589		)?;590		let mut fleet_data_path = self.directory.clone();591		fleet_data_path.push("fleet.nix");592		tempfile.persist(fleet_data_path)?;593		Ok(())594	}595}