git.delta.rocks / jrsonnet / refs/commits / 16589811b57c

difftreelog

source

crates/fleet-base/src/host.rs17.7 KiBsourcehistory
1use std::{2	cell::OnceCell,3	collections::BTreeSet,4	ffi::{OsStr, OsString},5	fmt::Display,6	io::Write,7	ops::Deref,8	path::PathBuf,9	str::FromStr,10	sync::{Arc, Mutex, MutexGuard, OnceLock},11};1213use anyhow::{anyhow, bail, ensure, Context, Result};14use fleet_shared::SecretData;15use nix_eval::{nix_go, nix_go_json, util::assert_warn, NixSession, Value};16use openssh::SessionBuilder;17use serde::de::DeserializeOwned;18use tempfile::NamedTempFile;1920use crate::{21	command::MyCommand,22	fleetdata::{FleetData, FleetSecret, FleetSharedSecret},23};2425pub struct FleetConfigInternals {26	/// Fleet project directory, containing fleet.nix file.27	pub directory: PathBuf,28	/// builtins.currentSystem29	pub local_system: String,30	pub data: Mutex<FleetData>,31	pub nix_args: Vec<OsString>,32	/// fleet_config.config33	pub config_field: Value,34	// TODO: Remove with connectivity refactor35	pub localhost: String,3637	/// import nixpkgs {system = local};38	pub default_pkgs: Value,39	/// inputs.nixpkgs40	pub nixpkgs: Value,4142	pub nix_session: NixSession,43}4445// TODO: Make field not pub46#[derive(Clone)]47pub struct Config(pub Arc<FleetConfigInternals>);4849impl Deref for Config {50	type Target = FleetConfigInternals;5152	fn deref(&self) -> &Self::Target {53		&self.054	}55}5657#[derive(Clone, Copy, Debug)]58pub enum EscalationStrategy {59	Sudo,60	Run0,61	Su,62}6364#[derive(Clone, PartialEq, Copy, Debug)]65pub enum DeployKind {66	/// NixOS => NixOS managed by fleet67	UpgradeToFleet,68	/// NixOS managed by fleet => NixOS managed by fleet69	Fleet,70	/// Remote host has /mnt, /mnt/boot mounted,71	/// generated config is added to fleet configuration.72	NixosInstall,73	/// Remote host has some system and nix installed in multi-user mode (/nix is owned by root),74	/// generated config is added to fleet configuration,75	/// and /etc/NIXOS_LUSTRATE exists, fleet will perform the rest.76	NixosLustrate,77}7879impl FromStr for DeployKind {80	type Err = anyhow::Error;81	fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {82		match s {83			"upgrade-to-fleet" => Ok(Self::UpgradeToFleet),84			"fleet" => Ok(Self::Fleet),85			"nixos-install" => Ok(Self::NixosInstall),86			"nixos-lustrate" => Ok(Self::NixosLustrate),87			v => bail!("unknown deploy_kind: {v}; expected on of \"upgrade-to-fleet\", \"fleet\", \"nixos-install\", \"nixos-lustrate\""),88		}89	}90}91pub struct ConfigHost {92	config: Config,93	pub name: String,94	groups: OnceCell<Vec<String>>,9596	deploy_kind: OnceCell<DeployKind>,9798	pub host_config: Option<Value>,99	pub nixos_config: OnceCell<Value>,100	pub pkgs_override: Option<Value>,101102	// TODO: Move command helpers away with connectivity refactor103	pub local: bool,104	pub session: OnceLock<Arc<openssh::Session>>,105}106// TODO: Move command helpers away with connectivity refactor107impl ConfigHost {108	pub fn set_deploy_kind(&self, kind: DeployKind) {109		self.deploy_kind110			.set(kind)111			.ok()112			.expect("deploy kind is already set");113	}114	pub async fn deploy_kind(&self) -> Result<DeployKind> {115		if let Some(kind) = self.deploy_kind.get() {116			return Ok(kind.clone());117		}118		let is_fleet_managed = match self.file_exists("/etc/FLEET_HOST").await {119			Ok(v) => v,120			Err(e) => {121				bail!("failed to query remote system kind: {}", e);122			}123		};124		if !is_fleet_managed {125			bail!(indoc::indoc! {"126				host is not marked as managed by fleet127				if you're not trying to lustrate/install system from scratch,128				you should either129					1. manually create /etc/FLEET_HOST file on the target host,130					2. use ?deploy_kind=fleet host argument if you're upgrading from older version of fleet131					3. use ?deploy_kind=upgrade_to_fleet if you're upgrading from plain nixos to fleet-managed nixos132			"});133		}134		// TOCTOU is possible135		let _ = self.deploy_kind.set(DeployKind::Fleet);136		Ok(self137			.deploy_kind138			.get()139			.expect("deploy kind is just set")140			.clone())141	}142	pub async fn escalation_strategy(&self) -> Result<EscalationStrategy> {143		// Prefer sudo, as run0 has some gotchas with polkit144		// and too many repeating prompts.145		if (self.find_in_path("sudo").await).is_ok() {146			return Ok(EscalationStrategy::Sudo);147		}148		if (self.find_in_path("run0").await).is_ok() {149			return Ok(EscalationStrategy::Run0);150		}151		Ok(EscalationStrategy::Su)152	}153	async fn open_session(&self) -> Result<Arc<openssh::Session>> {154		assert!(!self.local, "do not open ssh connection to local session");155		// FIXME: TOCTOU156		if let Some(session) = &self.session.get() {157			return Ok((*session).clone());158		};159		let session = SessionBuilder::default();160		let session = session161			.connect(&self.name)162			.await163			.map_err(|e| anyhow!("ssh error while connecting to {}: {e}", self.name))?;164		let session = Arc::new(session);165		self.session.set(session.clone()).expect("TOCTOU happened");166		Ok(session)167	}168	pub async fn mktemp_dir(&self) -> Result<String> {169		let mut cmd = self.cmd("mktemp").await?;170		cmd.arg("-d");171		let path = cmd.run_string().await?;172		Ok(path.trim_end().to_owned())173	}174	pub async fn file_exists(&self, path: impl AsRef<OsStr>) -> Result<bool> {175		let mut cmd = self.cmd("sh").await?;176		cmd.arg("-c")177			.arg("test -e \"$1\" && echo true || echo false")178			.arg("_")179			.arg(path);180		Ok(cmd.run_value().await?)181	}182	pub async fn read_file_bin(&self, path: impl AsRef<OsStr>) -> Result<Vec<u8>> {183		let mut cmd = self.cmd("cat").await?;184		cmd.arg(path);185		cmd.run_bytes().await186	}187	pub async fn read_file_text(&self, path: impl AsRef<OsStr>) -> Result<String> {188		let mut cmd = self.cmd("cat").await?;189		cmd.arg(path);190		cmd.run_string().await191	}192	pub async fn read_dir(&self, path: impl AsRef<OsStr>) -> Result<Vec<String>> {193		let mut cmd = self.cmd("ls").await?;194		cmd.arg(path);195		let out = cmd.run_string().await?;196		let mut lines = out.split('\n');197		if let Some(last) = lines.next_back() {198			ensure!(last.is_empty(), "output of ls should end with newline");199		}200		Ok(lines.map(ToOwned::to_owned).collect())201	}202	#[allow(dead_code)]203	pub async fn read_file_json<D: DeserializeOwned>(&self, path: impl AsRef<OsStr>) -> Result<D> {204		let text = self.read_file_text(path).await?;205		Ok(serde_json::from_str(&text)?)206	}207	pub async fn read_env(&self, env: &str) -> Result<String> {208		let mut cmd = self.cmd("printenv").await?;209		cmd.arg(env);210		cmd.run_string().await211	}212	pub async fn find_in_path(&self, command: &str) -> Result<String> {213		// // `which` is not a part of coreutils, and it might not exist on machine.214		// let path = self.read_env("PATH").await?;215		// // Assuming delimiter is :, we don't work with windows host, this check will be much216		// // more sophisticated in remowt backend (and quicker, since actual PATH search will be done on remote machine)217		// for ele in path.split(':') {218		// 	let test_path = format!("{ele}/{cmd}");219		// 	test -x etc220		// }221		// let mut cmd = self.cmd("printenv").await?;222		// cmd.arg(env);223		// Ok(cmd.run_string().await?)224		// Assuming this is an environment issue if which doesn't exist, will be fixed with remowt.225		let mut cmd = self226			.cmd_escalation(227				// Not used228				EscalationStrategy::Su,229				"which",230			)231			.await?;232		cmd.arg(command);233		cmd.run_string().await234	}235	pub async fn read_file_value<D: FromStr>(&self, path: impl AsRef<OsStr>) -> Result<D>236	where237		<D as FromStr>::Err: Display,238	{239		let text = self.read_file_text(path).await?;240		D::from_str(&text).map_err(|e| anyhow!("failed to parse value: {e}"))241	}242	pub async fn cmd(&self, cmd: impl AsRef<OsStr>) -> Result<MyCommand> {243		self.cmd_escalation(self.escalation_strategy().await?, cmd)244			.await245	}246	pub async fn cmd_escalation(247		&self,248		escalation: EscalationStrategy,249		cmd: impl AsRef<OsStr>,250	) -> Result<MyCommand> {251		if self.local {252			Ok(MyCommand::new(escalation, cmd))253		} else {254			let session = self.open_session().await?;255			Ok(MyCommand::new_on(escalation, cmd, session))256		}257	}258	pub async fn nix_cmd(&self) -> Result<MyCommand> {259		let mut nix = self.cmd("nix").await?;260		nix.args([261			"--extra-experimental-features",262			"nix-command",263			"--extra-experimental-features",264			"flakes",265		]);266		Ok(nix)267	}268269	pub async fn decrypt(&self, data: SecretData) -> Result<Vec<u8>> {270		ensure!(data.encrypted, "secret is not encrypted");271		let mut cmd = self.cmd("fleet-install-secrets").await?;272		cmd.arg("decrypt").eqarg("--secret", data.to_string());273		let encoded = cmd274			.sudo()275			.run_string()276			.await277			.context("failed to call remote host for decrypt")?;278		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;279		ensure!(!data.encrypted, "secret came out encrypted");280		Ok(data.data)281	}282	pub async fn reencrypt(&self, data: SecretData, targets: Vec<String>) -> Result<SecretData> {283		ensure!(data.encrypted, "secret is not encrypted");284		let mut cmd = self.cmd("fleet-install-secrets").await?;285		cmd.arg("reencrypt").eqarg("--secret", data.to_string());286		for target in targets {287			let key = self.config.key(&target).await?;288			cmd.eqarg("--targets", key);289		}290		let encoded = cmd291			.sudo()292			.run_string()293			.await294			.context("failed to call remote host for decrypt")?;295		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;296		ensure!(data.encrypted, "secret came out not encrypted");297		Ok(data)298	}299	/// Returns path for futureproofing, as path might change i.e on conversion to CA300	pub async fn remote_derivation(&self, path: &PathBuf) -> Result<PathBuf> {301		if self.local {302			// Path is located locally, thus already trusted.303			return Ok(path.to_owned());304		}305		let mut nix = MyCommand::new(306			// Not used307			EscalationStrategy::Su,308			"nix",309		);310		nix.arg("copy").arg("--substitute-on-destination");311312		match self.deploy_kind().await? {313			DeployKind::Fleet | DeployKind::UpgradeToFleet | DeployKind::NixosLustrate => {314				nix.comparg("--to", format!("ssh-ng://{}", self.name));315			}316			DeployKind::NixosInstall => {317				nix318					// Signature checking makes no sense with remote-store store argument set, as we're not even interacting with remote nix daemon319					.arg("--no-check-sigs")320					.comparg(321						"--to",322						format!("ssh-ng://root@{}-install?remote-store=/mnt", self.name),323					);324			}325		}326		nix.arg(path);327		nix.run_nix().await.context("nix copy")?;328		Ok(path.to_owned())329	}330	pub async fn systemctl_stop(&self, name: &str) -> Result<()> {331		let mut cmd = self.cmd("systemctl").await?;332		cmd.arg("stop").arg(name);333		cmd.sudo().run().await334	}335	pub async fn systemctl_start(&self, name: &str) -> Result<()> {336		let mut cmd = self.cmd("systemctl").await?;337		cmd.arg("start").arg(name);338		cmd.sudo().run().await339	}340341	pub async fn rm_file(&self, path: impl AsRef<OsStr>, sudo: bool) -> Result<()> {342		let mut cmd = self.cmd("rm").await?;343		cmd.arg("-f").arg(path);344		if sudo {345			cmd = cmd.sudo()346		}347		cmd.run().await348	}349}350impl ConfigHost {351	// TOCTOU is possible here in case if config is changed, but this case is not handled anywhere anyway,352	// assuming getting tags always returns the same value.353	pub async fn tags(&self) -> Result<Vec<String>> {354		if let Some(v) = self.groups.get() {355			return Ok(v.clone());356		}357		let Some(host_config) = &self.host_config else {358			return Ok(vec![]);359		};360		let tags: Vec<String> = nix_go_json!(host_config.tags);361362		let _ = self.groups.set(tags.clone());363364		Ok(tags)365	}366	pub async fn nixos_config(&self) -> Result<Value> {367		if let Some(v) = self.nixos_config.get() {368			return Ok(v.clone());369		}370		let Some(host_config) = &self.host_config else {371			bail!("local host has no nixos_config");372		};373		let nixos_config = nix_go!(host_config.nixos.config);374		assert_warn("nixos config evaluation", &nixos_config).await?;375376		let _ = self.nixos_config.set(nixos_config.clone());377378		Ok(nixos_config)379	}380381	pub async fn list_configured_secrets(&self) -> Result<Vec<String>> {382		let nixos = self.nixos_config().await?;383		let secrets = nix_go!(nixos.secrets);384		let mut out = Vec::new();385		for name in secrets.list_fields().await? {386			let secret = nix_go!(secrets[{ name }]);387			let is_shared: bool = nix_go_json!(secret.shared);388			if is_shared {389				continue;390			}391			out.push(name);392		}393		Ok(out)394	}395	pub async fn secret_field(&self, name: &str) -> Result<Value> {396		let nixos = self.nixos_config().await?;397		Ok(nix_go!(nixos.secrets[{ name }]))398	}399400	/// Packages for this host, resolved with nixpkgs overlays401	pub async fn pkgs(&self) -> Result<Value> {402		if let Some(value) = &self.pkgs_override {403			return Ok(value.clone());404		}405		let Some(host_config) = &self.host_config else {406			bail!("local host has no host_config");407		};408		// TODO: Should nixos.options be cached?409		Ok(nix_go!(host_config.nixos.options._module.args.value.pkgs))410	}411}412413impl Config {414	pub async fn tagged_hostnames(&self, tag: &str) -> Result<Vec<String>> {415		let config = &self.config_field;416		let tagged: Vec<String> = nix_go_json!(config.taggedWith[{ tag }]);417		Ok(tagged)418	}419	pub async fn expand_owner_set(&self, owners: Vec<String>) -> Result<BTreeSet<String>> {420		let mut out = BTreeSet::new();421		for owner in owners {422			if let Some(tag) = owner.strip_prefix('@') {423				let hosts = self.tagged_hostnames(tag).await?;424				out.extend(hosts);425			} else {426				out.insert(owner);427			}428		}429		Ok(out)430	}431	pub fn local_host(&self) -> ConfigHost {432		ConfigHost {433			config: self.clone(),434			name: "<virtual localhost>".to_owned(),435			host_config: None,436			nixos_config: OnceCell::new(),437			groups: {438				let cell = OnceCell::new();439				let _ = cell.set(vec![]);440				cell441			},442			pkgs_override: Some(self.default_pkgs.clone()),443444			local: true,445			session: OnceLock::new(),446			deploy_kind: OnceCell::new(),447		}448	}449450	pub async fn host(&self, name: &str) -> Result<ConfigHost> {451		let config = &self.config_field;452		let host_config = nix_go!(config.hosts[{ name }]);453454		Ok(ConfigHost {455			config: self.clone(),456			name: name.to_owned(),457			host_config: Some(host_config),458			nixos_config: OnceCell::new(),459			groups: OnceCell::new(),460			pkgs_override: None,461462			// TODO: Remove with connectivit refactor463			local: self.localhost == name,464			session: OnceLock::new(),465			deploy_kind: OnceCell::new(),466		})467	}468	pub async fn list_hosts(&self) -> Result<Vec<ConfigHost>> {469		let config = &self.config_field;470		let names = nix_go!(config.hosts).list_fields().await?;471		let mut out = vec![];472		for name in names {473			out.push(self.host(&name).await?);474		}475		Ok(out)476	}477	// TODO: Replace usages with .host().nixos_config478	pub async fn system_config(&self, host: &str) -> Result<Value> {479		let fleet_field = &self.config_field;480		Ok(nix_go!(fleet_field.hosts[{ host }].nixos.config))481	}482483	/// Shared secrets configured in fleet.nix or in flake484	pub async fn list_configured_shared(&self) -> Result<Vec<String>> {485		let config_field = &self.config_field;486		Ok(nix_go!(config_field.sharedSecrets).list_fields().await?)487	}488	/// Shared secrets configured in fleet.nix489	pub fn list_shared(&self) -> Vec<String> {490		let data = self.data();491		data.shared_secrets.keys().cloned().collect()492	}493	pub fn has_shared(&self, name: &str) -> bool {494		let data = self.data();495		data.shared_secrets.contains_key(name)496	}497	pub fn replace_shared(&self, name: String, shared: FleetSharedSecret) {498		let mut data = self.data_mut();499		data.shared_secrets.insert(name.to_owned(), shared);500	}501	pub fn remove_shared(&self, secret: &str) {502		let mut data = self.data_mut();503		data.shared_secrets.remove(secret);504	}505506	pub fn list_secrets(&self, host: &str) -> Vec<String> {507		let data = self.data();508		let Some(secrets) = data.host_secrets.get(host) else {509			return Vec::new();510		};511		secrets.keys().cloned().collect()512	}513514	pub fn has_secret(&self, host: &str, secret: &str) -> bool {515		let data = self.data();516		let Some(host_secrets) = data.host_secrets.get(host) else {517			return false;518		};519		host_secrets.contains_key(secret)520	}521	pub fn insert_secret(&self, host: &str, secret: String, value: FleetSecret) {522		let mut data = self.data_mut();523		let host_secrets = data.host_secrets.entry(host.to_owned()).or_default();524		host_secrets.insert(secret, value);525	}526527	pub fn host_secret(&self, host: &str, secret: &str) -> Result<FleetSecret> {528		let data = self.data();529		let Some(host_secrets) = data.host_secrets.get(host) else {530			bail!("no secrets for machine {host}");531		};532		let Some(secret) = host_secrets.get(secret) else {533			bail!("machine {host} has no secret {secret}");534		};535		Ok(secret.clone())536	}537	pub fn shared_secret(&self, secret: &str) -> Result<FleetSharedSecret> {538		let data = self.data();539		let Some(secret) = data.shared_secrets.get(secret) else {540			bail!("no shared secret {secret}");541		};542		Ok(secret.clone())543	}544	pub async fn shared_secret_expected_owners(&self, secret: &str) -> Result<Vec<String>> {545		let config_field = &self.config_field;546		Ok(nix_go_json!(547			config_field.sharedSecrets[{ secret }].expectedOwners548		))549	}550551	// TODO: Should this be something modifiable from other processes?552	// E.g terraform provider might want to update FleetData (e.g secrets),553	// and current implementation assumes only one process holds current fleet.nix554	// Given that it is no longer needs to be a file for nix evaluation,555	// maybe it can be a .nix file for persistence, but accessible only556	// thru some shared state controller? Might it be stored in terraform557	// state provider?558	pub fn data(&self) -> MutexGuard<FleetData> {559		self.data.lock().unwrap()560	}561	pub fn data_mut(&self) -> MutexGuard<FleetData> {562		self.data.lock().unwrap()563	}564	pub fn save(&self) -> Result<()> {565		let mut tempfile = NamedTempFile::new_in(self.directory.clone()).context("failed to create updated version of fleet.nix in the same directory as original.\nDo you have write access to it? Access only to the fleet.nix won't be enough, the directory is used for atomic overwrite operation.\nIt is not recommended to use fleet by root anyway, move fleet project to your home directory.")?;566		let data = nixlike::serialize(&self.data() as &FleetData)?;567		tempfile.write_all(568			format!(569				"# This file contains fleet state and shouldn't be edited by hand\n\n{}\n\n# vim: ts=2 et nowrap\n",570				data571			)572			.as_bytes(),573		)?;574		let mut fleet_data_path = self.directory.clone();575		fleet_data_path.push("fleet.nix");576		tempfile.persist(fleet_data_path)?;577		Ok(())578	}579}