git.delta.rocks / jrsonnet / refs/commits / 33e3a6cc33fd

difftreelog

source

crates/fleet-base/src/host.rs17.6 KiBsourcehistory
1use std::{2	cell::OnceCell,3	collections::BTreeSet,4	ffi::{OsStr, OsString},5	fmt::Display,6	io::Write,7	ops::Deref,8	path::PathBuf,9	str::FromStr,10	sync::{Arc, Mutex, MutexGuard, OnceLock},11};1213use anyhow::{anyhow, bail, ensure, Context, Result};14use fleet_shared::SecretData;15use nix_eval::{nix_go, nix_go_json, util::assert_warn, NixSession, Value};16use openssh::SessionBuilder;17use serde::de::DeserializeOwned;18use tempfile::NamedTempFile;1920use crate::{21	command::MyCommand,22	fleetdata::{FleetData, FleetSecret, FleetSharedSecret},23};2425pub struct FleetConfigInternals {26	/// Fleet project directory, containing fleet.nix file.27	pub directory: PathBuf,28	/// builtins.currentSystem29	pub local_system: String,30	pub data: Mutex<FleetData>,31	pub nix_args: Vec<OsString>,32	/// fleet_config.config33	pub config_field: Value,34	// TODO: Remove with connectivity refactor35	pub localhost: String,3637	/// import nixpkgs {system = local};38	pub default_pkgs: Value,39	/// inputs.nixpkgs40	pub nixpkgs: Value,4142	pub nix_session: NixSession,43}4445// TODO: Make field not pub46#[derive(Clone)]47pub struct Config(pub Arc<FleetConfigInternals>);4849impl Deref for Config {50	type Target = FleetConfigInternals;5152	fn deref(&self) -> &Self::Target {53		&self.054	}55}5657#[derive(Clone, Copy, Debug)]58pub enum EscalationStrategy {59	Sudo,60	Run0,61	Su,62}6364#[derive(Clone, PartialEq, Copy, Debug)]65pub enum DeployKind {66	/// NixOS => NixOS managed by fleet67	UpgradeToFleet,68	/// NixOS managed by fleet => NixOS managed by fleet69	Fleet,70	/// Remote host has /mnt, /mnt/boot mounted,71	/// generated config is added to fleet configuration.72	NixosInstall,73	/// Remote host has some system and nix installed in multi-user mode (/nix is owned by root),74	/// generated config is added to fleet configuration,75	/// and /etc/NIXOS_LUSTRATE exists, fleet will perform the rest.76	NixosLustrate,77}7879impl FromStr for DeployKind {80	type Err = anyhow::Error;81	fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {82		match s {83			"upgrade-to-fleet" => Ok(Self::UpgradeToFleet),84			"fleet" => Ok(Self::Fleet),85			"nixos-install" => Ok(Self::NixosInstall),86			v => bail!("unknown deploy_kind: {v}; expected on of \"upgrade-to-fleet\", \"fleet\", \"nixos-install\""),87		}88	}89}90pub struct ConfigHost {91	config: Config,92	pub name: String,93	groups: OnceCell<Vec<String>>,9495	deploy_kind: OnceCell<DeployKind>,9697	pub host_config: Option<Value>,98	pub nixos_config: OnceCell<Value>,99	pub pkgs_override: Option<Value>,100101	// TODO: Move command helpers away with connectivity refactor102	pub local: bool,103	pub session: OnceLock<Arc<openssh::Session>>,104}105// TODO: Move command helpers away with connectivity refactor106impl ConfigHost {107	pub fn set_deploy_kind(&self, kind: DeployKind) {108		self.deploy_kind109			.set(kind)110			.ok()111			.expect("deploy kind is already set");112	}113	pub async fn deploy_kind(&self) -> Result<DeployKind> {114		if let Some(kind) = self.deploy_kind.get() {115			return Ok(kind.clone());116		}117		let is_fleet_managed = match self.file_exists("/etc/FLEET_HOST").await {118			Ok(v) => v,119			Err(e) => {120				bail!("failed to query remote system kind: {}", e);121			}122		};123		if !is_fleet_managed {124			bail!(indoc::indoc! {"125				host is not marked as managed by fleet126				if you're not trying to lustrate/install system from scratch,127				you should either128					1. manually create /etc/FLEET_HOST file on the target host,129					2. use ?deploy_kind=fleet host argument if you're upgrading from older version of fleet130					3. use ?deploy_kind=upgrade_to_fleet if you're upgrading from plain nixos to fleet-managed nixos131			"});132		}133		// TOCTOU is possible134		let _ = self.deploy_kind.set(DeployKind::Fleet);135		Ok(self136			.deploy_kind137			.get()138			.expect("deploy kind is just set")139			.clone())140	}141	pub async fn escalation_strategy(&self) -> Result<EscalationStrategy> {142		// Prefer sudo, as run0 has some gotchas with polkit143		// and too many repeating prompts.144		if (self.find_in_path("sudo").await).is_ok() {145			return Ok(EscalationStrategy::Sudo);146		}147		if (self.find_in_path("run0").await).is_ok() {148			return Ok(EscalationStrategy::Run0);149		}150		Ok(EscalationStrategy::Su)151	}152	async fn open_session(&self) -> Result<Arc<openssh::Session>> {153		assert!(!self.local, "do not open ssh connection to local session");154		// FIXME: TOCTOU155		if let Some(session) = &self.session.get() {156			return Ok((*session).clone());157		};158		let session = SessionBuilder::default();159		let session = session160			.connect(&self.name)161			.await162			.map_err(|e| anyhow!("ssh error while connecting to {}: {e}", self.name))?;163		let session = Arc::new(session);164		self.session.set(session.clone()).expect("TOCTOU happened");165		Ok(session)166	}167	pub async fn mktemp_dir(&self) -> Result<String> {168		let mut cmd = self.cmd("mktemp").await?;169		cmd.arg("-d");170		let path = cmd.run_string().await?;171		Ok(path.trim_end().to_owned())172	}173	pub async fn file_exists(&self, path: impl AsRef<OsStr>) -> Result<bool> {174		let mut cmd = self.cmd("sh").await?;175		cmd.arg("-c")176			.arg("test -e \"$1\" && echo true || echo false")177			.arg("_")178			.arg(path);179		Ok(cmd.run_value().await?)180	}181	pub async fn read_file_bin(&self, path: impl AsRef<OsStr>) -> Result<Vec<u8>> {182		let mut cmd = self.cmd("cat").await?;183		cmd.arg(path);184		cmd.run_bytes().await185	}186	pub async fn read_file_text(&self, path: impl AsRef<OsStr>) -> Result<String> {187		let mut cmd = self.cmd("cat").await?;188		cmd.arg(path);189		cmd.run_string().await190	}191	pub async fn read_dir(&self, path: impl AsRef<OsStr>) -> Result<Vec<String>> {192		let mut cmd = self.cmd("ls").await?;193		cmd.arg(path);194		let out = cmd.run_string().await?;195		let mut lines = out.split('\n');196		if let Some(last) = lines.next_back() {197			ensure!(last.is_empty(), "output of ls should end with newline");198		}199		Ok(lines.map(ToOwned::to_owned).collect())200	}201	#[allow(dead_code)]202	pub async fn read_file_json<D: DeserializeOwned>(&self, path: impl AsRef<OsStr>) -> Result<D> {203		let text = self.read_file_text(path).await?;204		Ok(serde_json::from_str(&text)?)205	}206	pub async fn read_env(&self, env: &str) -> Result<String> {207		let mut cmd = self.cmd("printenv").await?;208		cmd.arg(env);209		cmd.run_string().await210	}211	pub async fn find_in_path(&self, command: &str) -> Result<String> {212		// // `which` is not a part of coreutils, and it might not exist on machine.213		// let path = self.read_env("PATH").await?;214		// // Assuming delimiter is :, we don't work with windows host, this check will be much215		// // more sophisticated in remowt backend (and quicker, since actual PATH search will be done on remote machine)216		// for ele in path.split(':') {217		// 	let test_path = format!("{ele}/{cmd}");218		// 	test -x etc219		// }220		// let mut cmd = self.cmd("printenv").await?;221		// cmd.arg(env);222		// Ok(cmd.run_string().await?)223		// Assuming this is an environment issue if which doesn't exist, will be fixed with remowt.224		let mut cmd = self225			.cmd_escalation(226				// Not used227				EscalationStrategy::Su,228				"which",229			)230			.await?;231		cmd.arg(command);232		cmd.run_string().await233	}234	pub async fn read_file_value<D: FromStr>(&self, path: impl AsRef<OsStr>) -> Result<D>235	where236		<D as FromStr>::Err: Display,237	{238		let text = self.read_file_text(path).await?;239		D::from_str(&text).map_err(|e| anyhow!("failed to parse value: {e}"))240	}241	pub async fn cmd(&self, cmd: impl AsRef<OsStr>) -> Result<MyCommand> {242		self.cmd_escalation(self.escalation_strategy().await?, cmd)243			.await244	}245	pub async fn cmd_escalation(246		&self,247		escalation: EscalationStrategy,248		cmd: impl AsRef<OsStr>,249	) -> Result<MyCommand> {250		if self.local {251			Ok(MyCommand::new(escalation, cmd))252		} else {253			let session = self.open_session().await?;254			Ok(MyCommand::new_on(escalation, cmd, session))255		}256	}257	pub async fn nix_cmd(&self) -> Result<MyCommand> {258		let mut nix = self.cmd("nix").await?;259		nix.args([260			"--extra-experimental-features",261			"nix-command",262			"--extra-experimental-features",263			"flakes",264		]);265		Ok(nix)266	}267268	pub async fn decrypt(&self, data: SecretData) -> Result<Vec<u8>> {269		ensure!(data.encrypted, "secret is not encrypted");270		let mut cmd = self.cmd("fleet-install-secrets").await?;271		cmd.arg("decrypt").eqarg("--secret", data.to_string());272		let encoded = cmd273			.sudo()274			.run_string()275			.await276			.context("failed to call remote host for decrypt")?;277		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;278		ensure!(!data.encrypted, "secret came out encrypted");279		Ok(data.data)280	}281	pub async fn reencrypt(&self, data: SecretData, targets: Vec<String>) -> Result<SecretData> {282		ensure!(data.encrypted, "secret is not encrypted");283		let mut cmd = self.cmd("fleet-install-secrets").await?;284		cmd.arg("reencrypt").eqarg("--secret", data.to_string());285		for target in targets {286			let key = self.config.key(&target).await?;287			cmd.eqarg("--targets", key);288		}289		let encoded = cmd290			.sudo()291			.run_string()292			.await293			.context("failed to call remote host for decrypt")?;294		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;295		ensure!(data.encrypted, "secret came out not encrypted");296		Ok(data)297	}298	/// Returns path for futureproofing, as path might change i.e on conversion to CA299	pub async fn remote_derivation(&self, path: &PathBuf) -> Result<PathBuf> {300		if self.local {301			// Path is located locally, thus already trusted.302			return Ok(path.to_owned());303		}304		let mut nix = MyCommand::new(305			// Not used306			EscalationStrategy::Su,307			"nix",308		);309		nix.arg("copy").arg("--substitute-on-destination");310311		match self.deploy_kind().await? {312			DeployKind::Fleet | DeployKind::UpgradeToFleet | DeployKind::NixosLustrate => {313				nix.comparg("--to", format!("ssh-ng://{}", self.name));314			}315			DeployKind::NixosInstall => {316				nix317					// Signature checking makes no sense with remote-store store argument set, as we're not even interacting with remote nix daemon318					.arg("--no-check-sigs")319					.comparg(320						"--to",321						format!("ssh-ng://root@{}-install?remote-store=/mnt", self.name),322					);323			}324		}325		nix.arg(path);326		nix.run_nix().await.context("nix copy")?;327		Ok(path.to_owned())328	}329	pub async fn systemctl_stop(&self, name: &str) -> Result<()> {330		let mut cmd = self.cmd("systemctl").await?;331		cmd.arg("stop").arg(name);332		cmd.sudo().run().await333	}334	pub async fn systemctl_start(&self, name: &str) -> Result<()> {335		let mut cmd = self.cmd("systemctl").await?;336		cmd.arg("start").arg(name);337		cmd.sudo().run().await338	}339340	pub async fn rm_file(&self, path: impl AsRef<OsStr>, sudo: bool) -> Result<()> {341		let mut cmd = self.cmd("rm").await?;342		cmd.arg("-f").arg(path);343		if sudo {344			cmd = cmd.sudo()345		}346		cmd.run().await347	}348}349impl ConfigHost {350	// TOCTOU is possible here in case if config is changed, but this case is not handled anywhere anyway,351	// assuming getting tags always returns the same value.352	pub async fn tags(&self) -> Result<Vec<String>> {353		if let Some(v) = self.groups.get() {354			return Ok(v.clone());355		}356		let Some(host_config) = &self.host_config else {357			return Ok(vec![]);358		};359		let tags: Vec<String> = nix_go_json!(host_config.tags);360361		let _ = self.groups.set(tags.clone());362363		Ok(tags)364	}365	pub async fn nixos_config(&self) -> Result<Value> {366		if let Some(v) = self.nixos_config.get() {367			return Ok(v.clone());368		}369		let Some(host_config) = &self.host_config else {370			bail!("local host has no nixos_config");371		};372		let nixos_config = nix_go!(host_config.nixos.config);373		assert_warn("nixos config evaluation", &nixos_config).await?;374375		let _ = self.nixos_config.set(nixos_config.clone());376377		Ok(nixos_config)378	}379380	pub async fn list_configured_secrets(&self) -> Result<Vec<String>> {381		let nixos = self.nixos_config().await?;382		let secrets = nix_go!(nixos.secrets);383		let mut out = Vec::new();384		for name in secrets.list_fields().await? {385			let secret = nix_go!(secrets[{ name }]);386			let is_shared: bool = nix_go_json!(secret.shared);387			if is_shared {388				continue;389			}390			out.push(name);391		}392		Ok(out)393	}394	pub async fn secret_field(&self, name: &str) -> Result<Value> {395		let nixos = self.nixos_config().await?;396		Ok(nix_go!(nixos.secrets[{ name }]))397	}398399	/// Packages for this host, resolved with nixpkgs overlays400	pub async fn pkgs(&self) -> Result<Value> {401		if let Some(value) = &self.pkgs_override {402			return Ok(value.clone());403		}404		let Some(host_config) = &self.host_config else {405			bail!("local host has no host_config");406		};407		// TODO: Should nixos.options be cached?408		Ok(nix_go!(host_config.nixos.options._module.args.value.pkgs))409	}410}411412impl Config {413	pub async fn tagged_hostnames(&self, tag: &str) -> Result<Vec<String>> {414		let config = &self.config_field;415		let tagged: Vec<String> = nix_go_json!(config.taggedWith[{ tag }]);416		Ok(tagged)417	}418	pub async fn expand_owner_set(&self, owners: Vec<String>) -> Result<BTreeSet<String>> {419		let mut out = BTreeSet::new();420		for owner in owners {421			if let Some(tag) = owner.strip_prefix('@') {422				let hosts = self.tagged_hostnames(tag).await?;423				out.extend(hosts);424			} else {425				out.insert(owner);426			}427		}428		Ok(out)429	}430	pub fn local_host(&self) -> ConfigHost {431		ConfigHost {432			config: self.clone(),433			name: "<virtual localhost>".to_owned(),434			host_config: None,435			nixos_config: OnceCell::new(),436			groups: {437				let cell = OnceCell::new();438				let _ = cell.set(vec![]);439				cell440			},441			pkgs_override: Some(self.default_pkgs.clone()),442443			local: true,444			session: OnceLock::new(),445			deploy_kind: OnceCell::new(),446		}447	}448449	pub async fn host(&self, name: &str) -> Result<ConfigHost> {450		let config = &self.config_field;451		let host_config = nix_go!(config.hosts[{ name }]);452453		Ok(ConfigHost {454			config: self.clone(),455			name: name.to_owned(),456			host_config: Some(host_config),457			nixos_config: OnceCell::new(),458			groups: OnceCell::new(),459			pkgs_override: None,460461			// TODO: Remove with connectivit refactor462			local: self.localhost == name,463			session: OnceLock::new(),464			deploy_kind: OnceCell::new(),465		})466	}467	pub async fn list_hosts(&self) -> Result<Vec<ConfigHost>> {468		let config = &self.config_field;469		let names = nix_go!(config.hosts).list_fields().await?;470		let mut out = vec![];471		for name in names {472			out.push(self.host(&name).await?);473		}474		Ok(out)475	}476	// TODO: Replace usages with .host().nixos_config477	pub async fn system_config(&self, host: &str) -> Result<Value> {478		let fleet_field = &self.config_field;479		Ok(nix_go!(fleet_field.hosts[{ host }].nixos.config))480	}481482	/// Shared secrets configured in fleet.nix or in flake483	pub async fn list_configured_shared(&self) -> Result<Vec<String>> {484		let config_field = &self.config_field;485		Ok(nix_go!(config_field.sharedSecrets).list_fields().await?)486	}487	/// Shared secrets configured in fleet.nix488	pub fn list_shared(&self) -> Vec<String> {489		let data = self.data();490		data.shared_secrets.keys().cloned().collect()491	}492	pub fn has_shared(&self, name: &str) -> bool {493		let data = self.data();494		data.shared_secrets.contains_key(name)495	}496	pub fn replace_shared(&self, name: String, shared: FleetSharedSecret) {497		let mut data = self.data_mut();498		data.shared_secrets.insert(name.to_owned(), shared);499	}500	pub fn remove_shared(&self, secret: &str) {501		let mut data = self.data_mut();502		data.shared_secrets.remove(secret);503	}504505	pub fn list_secrets(&self, host: &str) -> Vec<String> {506		let data = self.data();507		let Some(secrets) = data.host_secrets.get(host) else {508			return Vec::new();509		};510		secrets.keys().cloned().collect()511	}512513	pub fn has_secret(&self, host: &str, secret: &str) -> bool {514		let data = self.data();515		let Some(host_secrets) = data.host_secrets.get(host) else {516			return false;517		};518		host_secrets.contains_key(secret)519	}520	pub fn insert_secret(&self, host: &str, secret: String, value: FleetSecret) {521		let mut data = self.data_mut();522		let host_secrets = data.host_secrets.entry(host.to_owned()).or_default();523		host_secrets.insert(secret, value);524	}525526	pub fn host_secret(&self, host: &str, secret: &str) -> Result<FleetSecret> {527		let data = self.data();528		let Some(host_secrets) = data.host_secrets.get(host) else {529			bail!("no secrets for machine {host}");530		};531		let Some(secret) = host_secrets.get(secret) else {532			bail!("machine {host} has no secret {secret}");533		};534		Ok(secret.clone())535	}536	pub fn shared_secret(&self, secret: &str) -> Result<FleetSharedSecret> {537		let data = self.data();538		let Some(secret) = data.shared_secrets.get(secret) else {539			bail!("no shared secret {secret}");540		};541		Ok(secret.clone())542	}543	pub async fn shared_secret_expected_owners(&self, secret: &str) -> Result<Vec<String>> {544		let config_field = &self.config_field;545		Ok(nix_go_json!(546			config_field.sharedSecrets[{ secret }].expectedOwners547		))548	}549550	// TODO: Should this be something modifiable from other processes?551	// E.g terraform provider might want to update FleetData (e.g secrets),552	// and current implementation assumes only one process holds current fleet.nix553	// Given that it is no longer needs to be a file for nix evaluation,554	// maybe it can be a .nix file for persistence, but accessible only555	// thru some shared state controller? Might it be stored in terraform556	// state provider?557	pub fn data(&self) -> MutexGuard<FleetData> {558		self.data.lock().unwrap()559	}560	pub fn data_mut(&self) -> MutexGuard<FleetData> {561		self.data.lock().unwrap()562	}563	pub fn save(&self) -> Result<()> {564		let mut tempfile = NamedTempFile::new_in(self.directory.clone()).context("failed to create updated version of fleet.nix in the same directory as original.\nDo you have write access to it? Access only to the fleet.nix won't be enough, the directory is used for atomic overwrite operation.\nIt is not recommended to use fleet by root anyway, move fleet project to your home directory.")?;565		let data = nixlike::serialize(&self.data() as &FleetData)?;566		tempfile.write_all(567			format!(568				"# This file contains fleet state and shouldn't be edited by hand\n\n{}\n\n# vim: ts=2 et nowrap\n",569				data570			)571			.as_bytes(),572		)?;573		let mut fleet_data_path = self.directory.clone();574		fleet_data_path.push("fleet.nix");575		tempfile.persist(fleet_data_path)?;576		Ok(())577	}578}