git.delta.rocks / jrsonnet / refs/commits / 1470de8a447c

difftreelog

source

crates/fleet-base/src/host.rs20.7 KiBsourcehistory
1use std::{2	cell::OnceCell,3	collections::BTreeSet,4	ffi::{OsStr, OsString},5	fmt::Display,6	io::Write,7	ops::Deref,8	path::PathBuf,9	str::FromStr,10	sync::{Arc, Mutex, MutexGuard, OnceLock},11};1213use anyhow::{anyhow, bail, ensure, Context, Result};14use fleet_shared::SecretData;15use nix_eval::{nix_go, nix_go_json, util::assert_warn, NixSession, Value};16use openssh::SessionBuilder;17use serde::de::DeserializeOwned;18use tabled::Tabled;19use tempfile::NamedTempFile;20use time::{format_description, UtcDateTime};21use tracing::warn;2223use crate::{24	command::MyCommand,25	fleetdata::{FleetData, FleetSecret, FleetSharedSecret},26};2728pub struct FleetConfigInternals {29	/// Fleet project directory, containing fleet.nix file.30	pub directory: PathBuf,31	/// builtins.currentSystem32	pub local_system: String,33	pub data: Mutex<FleetData>,34	pub nix_args: Vec<OsString>,35	/// fleet_config.config36	pub config_field: Value,37	// TODO: Remove with connectivity refactor38	pub localhost: String,3940	/// import nixpkgs {system = local};41	pub default_pkgs: Value,42	/// inputs.nixpkgs43	pub nixpkgs: Value,4445	pub nix_session: NixSession,46}4748// TODO: Make field not pub49#[derive(Clone)]50pub struct Config(pub Arc<FleetConfigInternals>);5152impl Deref for Config {53	type Target = FleetConfigInternals;5455	fn deref(&self) -> &Self::Target {56		&self.057	}58}5960#[derive(Clone, Copy, Debug)]61pub enum EscalationStrategy {62	Sudo,63	Run0,64	Su,65}6667#[derive(Clone, PartialEq, Copy, Debug)]68pub enum DeployKind {69	/// NixOS => NixOS managed by fleet70	UpgradeToFleet,71	/// NixOS managed by fleet => NixOS managed by fleet72	Fleet,73	/// Remote host has /mnt, /mnt/boot mounted,74	/// generated config is added to fleet configuration.75	NixosInstall,76	/// Remote host has some system and nix installed in multi-user mode (/nix is owned by root),77	/// generated config is added to fleet configuration,78	/// and /etc/NIXOS_LUSTRATE exists, fleet will perform the rest.79	NixosLustrate,80}8182impl FromStr for DeployKind {83	type Err = anyhow::Error;84	fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {85		match s {86			"upgrade-to-fleet" => Ok(Self::UpgradeToFleet),87			"fleet" => Ok(Self::Fleet),88			"nixos-install" => Ok(Self::NixosInstall),89			"nixos-lustrate" => Ok(Self::NixosLustrate),90			v => bail!("unknown deploy_kind: {v}; expected on of \"upgrade-to-fleet\", \"fleet\", \"nixos-install\", \"nixos-lustrate\""),91		}92	}93}94pub struct ConfigHost {95	config: Config,96	pub name: String,97	groups: OnceCell<Vec<String>>,9899	deploy_kind: OnceCell<DeployKind>,100101	pub host_config: Option<Value>,102	pub nixos_config: OnceCell<Value>,103	pub nixos_unchecked_config: OnceCell<Value>,104	pub pkgs_override: Option<Value>,105106	// TODO: Move command helpers away with connectivity refactor107	pub local: bool,108	pub session: OnceLock<Arc<openssh::Session>>,109}110111#[derive(Debug, Clone, Copy)]112pub enum GenerationStorage {113	Deployer,114	Machine,115	Pusher,116}117impl GenerationStorage {118	fn prefix(&self) -> &'static str {119		match self {120			GenerationStorage::Deployer => "deployer.",121			GenerationStorage::Machine => "",122			GenerationStorage::Pusher => "pusher.",123		}124	}125}126127#[derive(Tabled, Debug)]128pub struct Generation {129	#[tabled(rename = "ID", format("{}", self.rollback_id()))]130	pub id: u32,131	#[tabled(rename = "Current")]132	pub current: bool,133	#[tabled(rename = "Created at")]134	pub datetime: UtcDateTime,135	#[tabled(format = "{:?}")]136	pub store_path: PathBuf,137	#[tabled(skip)]138	pub location: GenerationStorage,139}140impl Generation {141	pub fn rollback_id(&self) -> String {142		format!("{}{}", self.location.prefix(), self.id)143	}144}145146fn parse_generation_line(g: &str) -> Option<Generation> {147	let mut parts = g.split_whitespace();148	let id = parts.next()?;149	let id: u32 = id.parse().ok()?;150	let date = parts.next()?;151	let time = parts.next()?;152	let current = if let Some(current) = parts.next() {153		if current == "(current)" {154			Some(true)155		} else {156			None157		}158	} else {159		Some(false)160	};161	let current = current?;162	if parts.next().is_some() {163		warn!("unexpected text after generation: {g}");164	}165166	let format = format_description::parse("[year]-[month]-[day] [hour]:[minute]:[second]")167		.expect("valid format");168	let datetime = UtcDateTime::parse(&format!("{date} {time}"), &format).ok()?;169170	Some(Generation {171		id,172		current,173		datetime,174		store_path: PathBuf::new(),175		location: GenerationStorage::Machine,176	})177}178// TODO: Move command helpers away with connectivity refactor179impl ConfigHost {180	pub async fn list_generations(&self, profile: &str) -> Result<Vec<Generation>> {181		let mut cmd = self.cmd("nix-env").await?;182		cmd.comparg("--profile", format!("/nix/var/nix/profiles/{profile}"))183			.arg("--list-generations")184			.env("TZ", "UTC");185		// Sudo is required because --list-generations tries to acquire profile lock186		let data = cmd.sudo().run_string().await?;187		let mut generations = data188			.split('\n')189			.map(|e| e.trim())190			.filter(|&l| !l.is_empty())191			.filter_map(|g| {192				let gen = parse_generation_line(g);193				if gen.is_none() {194					warn!("bad generation: {g}");195				};196				gen197			})198			.collect::<Vec<_>>();199		for ele in generations.iter_mut() {200			let mut cmd = self.cmd("readlink").await?;201			cmd.arg("--")202				.arg(format!("/nix/var/nix/profiles/{profile}-{}-link", ele.id));203			let path = cmd.run_string().await?;204			ele.store_path = PathBuf::from(path.trim_end_matches("\n"));205		}206207		Ok(generations)208	}209210	pub fn set_deploy_kind(&self, kind: DeployKind) {211		self.deploy_kind212			.set(kind)213			.ok()214			.expect("deploy kind is already set");215	}216	pub async fn deploy_kind(&self) -> Result<DeployKind> {217		if let Some(kind) = self.deploy_kind.get() {218			return Ok(kind.clone());219		}220		let is_fleet_managed = match self.file_exists("/etc/FLEET_HOST").await {221			Ok(v) => v,222			Err(e) => {223				bail!("failed to query remote system kind: {}", e);224			}225		};226		if !is_fleet_managed {227			bail!(indoc::indoc! {"228				host is not marked as managed by fleet229				if you're not trying to lustrate/install system from scratch,230				you should either231					1. manually create /etc/FLEET_HOST file on the target host,232					2. use ?deploy_kind=fleet host argument if you're upgrading from older version of fleet233					3. use ?deploy_kind=upgrade_to_fleet if you're upgrading from plain nixos to fleet-managed nixos234			"});235		}236		// TOCTOU is possible237		let _ = self.deploy_kind.set(DeployKind::Fleet);238		Ok(self239			.deploy_kind240			.get()241			.expect("deploy kind is just set")242			.clone())243	}244	pub async fn escalation_strategy(&self) -> Result<EscalationStrategy> {245		// Prefer sudo, as run0 has some gotchas with polkit246		// and too many repeating prompts.247		if (self.find_in_path("sudo").await).is_ok() {248			return Ok(EscalationStrategy::Sudo);249		}250		if (self.find_in_path("run0").await).is_ok() {251			return Ok(EscalationStrategy::Run0);252		}253		Ok(EscalationStrategy::Su)254	}255	async fn open_session(&self) -> Result<Arc<openssh::Session>> {256		assert!(!self.local, "do not open ssh connection to local session");257		// FIXME: TOCTOU258		if let Some(session) = &self.session.get() {259			return Ok((*session).clone());260		};261		let session = SessionBuilder::default();262		let session = session263			.connect(&self.name)264			.await265			.map_err(|e| anyhow!("ssh error while connecting to {}: {e:#?}", self.name))?;266		let session = Arc::new(session);267		self.session.set(session.clone()).expect("TOCTOU happened");268		Ok(session)269	}270	pub async fn mktemp_dir(&self) -> Result<String> {271		let mut cmd = self.cmd("mktemp").await?;272		cmd.arg("-d");273		let path = cmd.run_string().await?;274		Ok(path.trim_end().to_owned())275	}276	pub async fn file_exists(&self, path: impl AsRef<OsStr>) -> Result<bool> {277		let mut cmd = self.cmd("sh").await?;278		cmd.arg("-c")279			.arg("test -e \"$1\" && echo true || echo false")280			.arg("_")281			.arg(path);282		Ok(cmd.run_value().await?)283	}284	pub async fn read_file_bin(&self, path: impl AsRef<OsStr>) -> Result<Vec<u8>> {285		let mut cmd = self.cmd("cat").await?;286		cmd.arg(path);287		cmd.run_bytes().await288	}289	pub async fn read_file_text(&self, path: impl AsRef<OsStr>) -> Result<String> {290		let mut cmd = self.cmd("cat").await?;291		cmd.arg(path);292		cmd.run_string().await293	}294	pub async fn read_dir(&self, path: impl AsRef<OsStr>) -> Result<Vec<String>> {295		let mut cmd = self.cmd("ls").await?;296		cmd.arg(path);297		let out = cmd.run_string().await?;298		let mut lines = out.split('\n');299		if let Some(last) = lines.next_back() {300			ensure!(last.is_empty(), "output of ls should end with newline");301		}302		Ok(lines.map(ToOwned::to_owned).collect())303	}304	#[allow(dead_code)]305	pub async fn read_file_json<D: DeserializeOwned>(&self, path: impl AsRef<OsStr>) -> Result<D> {306		let text = self.read_file_text(path).await?;307		Ok(serde_json::from_str(&text)?)308	}309	pub async fn read_env(&self, env: &str) -> Result<String> {310		let mut cmd = self.cmd("printenv").await?;311		cmd.arg(env);312		cmd.run_string().await313	}314	pub async fn find_in_path(&self, command: &str) -> Result<String> {315		// // `which` is not a part of coreutils, and it might not exist on machine.316		// let path = self.read_env("PATH").await?;317		// // Assuming delimiter is :, we don't work with windows host, this check will be much318		// // more sophisticated in remowt backend (and quicker, since actual PATH search will be done on remote machine)319		// for ele in path.split(':') {320		// 	let test_path = format!("{ele}/{cmd}");321		// 	test -x etc322		// }323		// let mut cmd = self.cmd("printenv").await?;324		// cmd.arg(env);325		// Ok(cmd.run_string().await?)326		// Assuming this is an environment issue if which doesn't exist, will be fixed with remowt.327		let mut cmd = self328			.cmd_escalation(329				// Not used330				EscalationStrategy::Su,331				"which",332			)333			.await?;334		cmd.arg(command);335		cmd.run_string().await336	}337	pub async fn read_file_value<D: FromStr>(&self, path: impl AsRef<OsStr>) -> Result<D>338	where339		<D as FromStr>::Err: Display,340	{341		let text = self.read_file_text(path).await?;342		D::from_str(&text).map_err(|e| anyhow!("failed to parse value: {e}"))343	}344	pub async fn cmd(&self, cmd: impl AsRef<OsStr>) -> Result<MyCommand> {345		self.cmd_escalation(self.escalation_strategy().await?, cmd)346			.await347	}348	pub async fn cmd_escalation(349		&self,350		escalation: EscalationStrategy,351		cmd: impl AsRef<OsStr>,352	) -> Result<MyCommand> {353		if self.local {354			Ok(MyCommand::new(escalation, cmd))355		} else {356			let session = self.open_session().await?;357			Ok(MyCommand::new_on(escalation, cmd, session))358		}359	}360	pub async fn nix_cmd(&self) -> Result<MyCommand> {361		let mut nix = self.cmd("nix").await?;362		nix.args([363			"--extra-experimental-features",364			"nix-command",365			"--extra-experimental-features",366			"flakes",367		]);368		Ok(nix)369	}370371	pub async fn decrypt(&self, data: SecretData) -> Result<Vec<u8>> {372		ensure!(data.encrypted, "secret is not encrypted");373		let mut cmd = self.cmd("fleet-install-secrets").await?;374		cmd.arg("decrypt").eqarg("--secret", data.to_string());375		let encoded = cmd376			.sudo()377			.run_string()378			.await379			.context("failed to call remote host for decrypt")?;380		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;381		ensure!(!data.encrypted, "secret came out encrypted");382		Ok(data.data)383	}384	pub async fn reencrypt(&self, data: SecretData, targets: Vec<String>) -> Result<SecretData> {385		ensure!(data.encrypted, "secret is not encrypted");386		let mut cmd = self.cmd("fleet-install-secrets").await?;387		cmd.arg("reencrypt").eqarg("--secret", data.to_string());388		for target in targets {389			let key = self.config.key(&target).await?;390			cmd.eqarg("--targets", key);391		}392		let encoded = cmd393			.sudo()394			.run_string()395			.await396			.context("failed to call remote host for decrypt")?;397		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;398		ensure!(data.encrypted, "secret came out not encrypted");399		Ok(data)400	}401	/// Returns path for futureproofing, as path might change i.e on conversion to CA402	pub async fn remote_derivation(&self, path: &PathBuf) -> Result<PathBuf> {403		if self.local {404			// Path is located locally, thus already trusted.405			return Ok(path.to_owned());406		}407		let mut nix = MyCommand::new(408			// Not used409			EscalationStrategy::Su,410			"nix",411		);412		nix.arg("copy").arg("--substitute-on-destination");413414		match self.deploy_kind().await? {415			DeployKind::Fleet | DeployKind::UpgradeToFleet | DeployKind::NixosLustrate => {416				nix.comparg("--to", format!("ssh-ng://{}", self.name));417			}418			DeployKind::NixosInstall => {419				nix420					// Signature checking makes no sense with remote-store store argument set, as we're not even interacting with remote nix daemon421					.arg("--no-check-sigs")422					.comparg(423						"--to",424						format!("ssh-ng://root@{}?remote-store=/mnt", self.name),425					);426			}427		}428		nix.arg(path);429		nix.run_nix().await.context("nix copy")?;430		Ok(path.to_owned())431	}432	pub async fn systemctl_stop(&self, name: &str) -> Result<()> {433		let mut cmd = self.cmd("systemctl").await?;434		cmd.arg("stop").arg(name);435		cmd.sudo().run().await436	}437	pub async fn systemctl_start(&self, name: &str) -> Result<()> {438		let mut cmd = self.cmd("systemctl").await?;439		cmd.arg("start").arg(name);440		cmd.sudo().run().await441	}442443	pub async fn rm_file(&self, path: impl AsRef<OsStr>, sudo: bool) -> Result<()> {444		let mut cmd = self.cmd("rm").await?;445		cmd.arg("-f").arg(path);446		if sudo {447			cmd = cmd.sudo()448		}449		cmd.run().await450	}451}452impl ConfigHost {453	// TOCTOU is possible here in case if config is changed, but this case is not handled anywhere anyway,454	// assuming getting tags always returns the same value.455	pub async fn tags(&self) -> Result<Vec<String>> {456		if let Some(v) = self.groups.get() {457			return Ok(v.clone());458		}459		let Some(host_config) = &self.host_config else {460			return Ok(vec![]);461		};462		let tags: Vec<String> = nix_go_json!(host_config.tags);463464		let _ = self.groups.set(tags.clone());465466		Ok(tags)467	}468	pub async fn nixos_config(&self) -> Result<Value> {469		if let Some(v) = self.nixos_config.get() {470			return Ok(v.clone());471		}472		let Some(host_config) = &self.host_config else {473			bail!("local host has no nixos_config");474		};475		let nixos_config = nix_go!(host_config.nixos.config);476		assert_warn("nixos config evaluation", &nixos_config).await?;477478		let _ = self.nixos_config.set(nixos_config.clone());479480		Ok(nixos_config)481	}482	pub async fn nixos_unchecked_config(&self) -> Result<Value> {483		if let Some(v) = self.nixos_unchecked_config.get() {484			return Ok(v.clone());485		}486		let Some(host_config) = &self.host_config else {487			bail!("local host has no nixos_config");488		};489		let nixos_config = nix_go!(host_config.nixos_unchecked.config);490491		let _ = self.nixos_unchecked_config.set(nixos_config.clone());492493		Ok(nixos_config)494	}495496	pub async fn list_configured_secrets(&self) -> Result<Vec<String>> {497		let nixos = self.nixos_unchecked_config().await?;498		let secrets = nix_go!(nixos.secrets);499		let mut out = Vec::new();500		for name in secrets.list_fields().await? {501			let secret = nix_go!(secrets[{ name }]);502			let is_shared: bool = nix_go_json!(secret.shared);503			if is_shared {504				continue;505			}506			out.push(name);507		}508		Ok(out)509	}510	pub async fn secret_field(&self, name: &str) -> Result<Value> {511		let nixos = self.nixos_unchecked_config().await?;512		Ok(nix_go!(nixos.secrets[{ name }]))513	}514515	/// Packages for this host, resolved with nixpkgs overlays516	pub async fn pkgs(&self) -> Result<Value> {517		if let Some(value) = &self.pkgs_override {518			return Ok(value.clone());519		}520		let Some(host_config) = &self.host_config else {521			bail!("local host has no host_config");522		};523		// TODO: Should nixos.options be cached?524		Ok(nix_go!(host_config.nixos.options._module.args.value.pkgs))525	}526}527528impl Config {529	pub async fn tagged_hostnames(&self, tag: &str) -> Result<Vec<String>> {530		let config = &self.config_field;531		let tagged: Vec<String> = nix_go_json!(config.taggedWith[{ tag }]);532		Ok(tagged)533	}534	pub async fn expand_owner_set(&self, owners: Vec<String>) -> Result<BTreeSet<String>> {535		let mut out = BTreeSet::new();536		for owner in owners {537			if let Some(tag) = owner.strip_prefix('@') {538				let hosts = self.tagged_hostnames(tag).await?;539				out.extend(hosts);540			} else {541				out.insert(owner);542			}543		}544		Ok(out)545	}546	pub fn local_host(&self) -> ConfigHost {547		ConfigHost {548			config: self.clone(),549			name: "<virtual localhost>".to_owned(),550			host_config: None,551			nixos_config: OnceCell::new(),552			nixos_unchecked_config: OnceCell::new(),553			groups: {554				let cell = OnceCell::new();555				let _ = cell.set(vec![]);556				cell557			},558			pkgs_override: Some(self.default_pkgs.clone()),559560			local: true,561			session: OnceLock::new(),562			deploy_kind: OnceCell::new(),563		}564	}565566	pub async fn host(&self, name: &str) -> Result<ConfigHost> {567		let config = &self.config_field;568		let host_config = nix_go!(config.hosts[{ name }]);569570		Ok(ConfigHost {571			config: self.clone(),572			name: name.to_owned(),573			host_config: Some(host_config),574			nixos_config: OnceCell::new(),575			nixos_unchecked_config: OnceCell::new(),576			groups: OnceCell::new(),577			pkgs_override: None,578579			// TODO: Remove with connectivit refactor580			local: self.localhost == name,581			session: OnceLock::new(),582			deploy_kind: OnceCell::new(),583		})584	}585	pub async fn list_hosts(&self) -> Result<Vec<ConfigHost>> {586		let config = &self.config_field;587		let names = nix_go!(config.hosts).list_fields().await?;588		let mut out = vec![];589		for name in names {590			out.push(self.host(&name).await?);591		}592		Ok(out)593	}594	// TODO: Replace usages with .host().nixos_config595	pub async fn system_config(&self, host: &str) -> Result<Value> {596		let fleet_field = &self.config_field;597		Ok(nix_go!(fleet_field.hosts[{ host }].nixos.config))598	}599600	/// Shared secrets configured in fleet.nix or in flake601	pub async fn list_configured_shared(&self) -> Result<Vec<String>> {602		let config_field = &self.config_field;603		Ok(nix_go!(config_field.sharedSecrets).list_fields().await?)604	}605	/// Shared secrets configured in fleet.nix606	pub fn list_shared(&self) -> Vec<String> {607		let data = self.data();608		data.shared_secrets.keys().cloned().collect()609	}610	pub fn has_shared(&self, name: &str) -> bool {611		let data = self.data();612		data.shared_secrets.contains_key(name)613	}614	pub fn replace_shared(&self, name: String, shared: FleetSharedSecret) {615		let mut data = self.data_mut();616		data.shared_secrets.insert(name.to_owned(), shared);617	}618	pub fn remove_shared(&self, secret: &str) {619		let mut data = self.data_mut();620		data.shared_secrets.remove(secret);621	}622623	pub fn list_secrets(&self, host: &str) -> Vec<String> {624		let data = self.data();625		let Some(secrets) = data.host_secrets.get(host) else {626			return Vec::new();627		};628		secrets.keys().cloned().collect()629	}630631	pub fn has_secret(&self, host: &str, secret: &str) -> bool {632		let data = self.data();633		let Some(host_secrets) = data.host_secrets.get(host) else {634			return false;635		};636		host_secrets.contains_key(secret)637	}638	pub fn insert_secret(&self, host: &str, secret: String, value: FleetSecret) {639		let mut data = self.data_mut();640		let host_secrets = data.host_secrets.entry(host.to_owned()).or_default();641		host_secrets.insert(secret, value);642	}643644	pub fn host_secret(&self, host: &str, secret: &str) -> Result<FleetSecret> {645		let data = self.data();646		let Some(host_secrets) = data.host_secrets.get(host) else {647			bail!("no secrets for machine {host}");648		};649		let Some(secret) = host_secrets.get(secret) else {650			bail!("machine {host} has no secret {secret}");651		};652		Ok(secret.clone())653	}654	pub fn shared_secret(&self, secret: &str) -> Result<FleetSharedSecret> {655		let data = self.data();656		let Some(secret) = data.shared_secrets.get(secret) else {657			bail!("no shared secret {secret}");658		};659		Ok(secret.clone())660	}661	pub async fn shared_secret_expected_owners(&self, secret: &str) -> Result<Vec<String>> {662		let config_field = &self.config_field;663		Ok(nix_go_json!(664			config_field.sharedSecrets[{ secret }].expectedOwners665		))666	}667668	// TODO: Should this be something modifiable from other processes?669	// E.g terraform provider might want to update FleetData (e.g secrets),670	// and current implementation assumes only one process holds current fleet.nix671	// Given that it is no longer needs to be a file for nix evaluation,672	// maybe it can be a .nix file for persistence, but accessible only673	// thru some shared state controller? Might it be stored in terraform674	// state provider?675	pub fn data(&self) -> MutexGuard<FleetData> {676		self.data.lock().unwrap()677	}678	pub fn data_mut(&self) -> MutexGuard<FleetData> {679		self.data.lock().unwrap()680	}681	pub fn save(&self) -> Result<()> {682		let mut tempfile = NamedTempFile::new_in(self.directory.clone()).context("failed to create updated version of fleet.nix in the same directory as original.\nDo you have write access to it? Access only to the fleet.nix won't be enough, the directory is used for atomic overwrite operation.\nIt is not recommended to use fleet by root anyway, move fleet project to your home directory.")?;683		let data = nixlike::serialize(&self.data() as &FleetData)?;684		tempfile.write_all(685			format!(686				"# This file contains fleet state and shouldn't be edited by hand\n\n{}\n\n# vim: ts=2 et nowrap\n",687				data688			)689			.as_bytes(),690		)?;691		let mut fleet_data_path = self.directory.clone();692		fleet_data_path.push("fleet.nix");693		tempfile.persist(fleet_data_path)?;694		Ok(())695	}696}