git.delta.rocks / jrsonnet / refs/commits / 94ece5cae749

difftreelog

source

crates/fleet-base/src/host.rs20.7 KiBsourcehistory
1use std::{2	cell::OnceCell,3	collections::BTreeSet,4	ffi::{OsStr, OsString},5	fmt::Display,6	io::Write,7	ops::Deref,8	path::PathBuf,9	str::FromStr,10	sync::{Arc, Mutex, MutexGuard, OnceLock},11};1213use anyhow::{Context, Result, anyhow, bail, ensure};14use fleet_shared::SecretData;15use nix_eval::{NixSession, Value, nix_go, nix_go_json, util::assert_warn};16use openssh::SessionBuilder;17use serde::de::DeserializeOwned;18use tabled::Tabled;19use tempfile::NamedTempFile;20use time::{UtcDateTime, format_description};21use tracing::warn;2223use crate::{24	command::MyCommand,25	fleetdata::{FleetData, FleetSecret, FleetSharedSecret},26};2728pub struct FleetConfigInternals {29	/// Fleet project directory, containing fleet.nix file.30	pub directory: PathBuf,31	/// builtins.currentSystem32	pub local_system: String,33	pub data: Mutex<FleetData>,34	pub nix_args: Vec<OsString>,35	/// fleet_config.config36	pub config_field: Value,37	// TODO: Remove with connectivity refactor38	pub localhost: String,3940	/// import nixpkgs {system = local};41	pub default_pkgs: Value,42	/// inputs.nixpkgs43	pub nixpkgs: Value,4445	pub nix_session: NixSession,46}4748// TODO: Make field not pub49#[derive(Clone)]50pub struct Config(pub Arc<FleetConfigInternals>);5152impl Deref for Config {53	type Target = FleetConfigInternals;5455	fn deref(&self) -> &Self::Target {56		&self.057	}58}5960#[derive(Clone, Copy, Debug)]61pub enum EscalationStrategy {62	Sudo,63	Run0,64	Su,65}6667#[derive(Clone, PartialEq, Copy, Debug)]68pub enum DeployKind {69	/// NixOS => NixOS managed by fleet70	UpgradeToFleet,71	/// NixOS managed by fleet => NixOS managed by fleet72	Fleet,73	/// Remote host has /mnt, /mnt/boot mounted,74	/// generated config is added to fleet configuration.75	NixosInstall,76	/// Remote host has some system and nix installed in multi-user mode (/nix is owned by root),77	/// generated config is added to fleet configuration,78	/// and /etc/NIXOS_LUSTRATE exists, fleet will perform the rest.79	NixosLustrate,80}8182impl FromStr for DeployKind {83	type Err = anyhow::Error;84	fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {85		match s {86			"upgrade-to-fleet" => Ok(Self::UpgradeToFleet),87			"fleet" => Ok(Self::Fleet),88			"nixos-install" => Ok(Self::NixosInstall),89			"nixos-lustrate" => Ok(Self::NixosLustrate),90			v => bail!(91				"unknown deploy_kind: {v}; expected on of \"upgrade-to-fleet\", \"fleet\", \"nixos-install\", \"nixos-lustrate\""92			),93		}94	}95}96pub struct ConfigHost {97	config: Config,98	pub name: String,99	groups: OnceCell<Vec<String>>,100101	deploy_kind: OnceCell<DeployKind>,102103	pub host_config: Option<Value>,104	pub nixos_config: OnceCell<Value>,105	pub nixos_unchecked_config: OnceCell<Value>,106	pub pkgs_override: Option<Value>,107108	// TODO: Move command helpers away with connectivity refactor109	pub local: bool,110	pub session: OnceLock<Arc<openssh::Session>>,111}112113#[derive(Debug, Clone, Copy)]114pub enum GenerationStorage {115	Deployer,116	Machine,117	Pusher,118}119impl GenerationStorage {120	fn prefix(&self) -> &'static str {121		match self {122			GenerationStorage::Deployer => "deployer.",123			GenerationStorage::Machine => "",124			GenerationStorage::Pusher => "pusher.",125		}126	}127}128129#[derive(Tabled, Debug)]130pub struct Generation {131	#[tabled(rename = "ID", format("{}", self.rollback_id()))]132	pub id: u32,133	#[tabled(rename = "Current")]134	pub current: bool,135	#[tabled(rename = "Created at")]136	pub datetime: UtcDateTime,137	#[tabled(format = "{:?}")]138	pub store_path: PathBuf,139	#[tabled(skip)]140	pub location: GenerationStorage,141}142impl Generation {143	pub fn rollback_id(&self) -> String {144		format!("{}{}", self.location.prefix(), self.id)145	}146}147148fn parse_generation_line(g: &str) -> Option<Generation> {149	let mut parts = g.split_whitespace();150	let id = parts.next()?;151	let id: u32 = id.parse().ok()?;152	let date = parts.next()?;153	let time = parts.next()?;154	let current = if let Some(current) = parts.next() {155		if current == "(current)" {156			Some(true)157		} else {158			None159		}160	} else {161		Some(false)162	};163	let current = current?;164	if parts.next().is_some() {165		warn!("unexpected text after generation: {g}");166	}167168	let format = format_description::parse("[year]-[month]-[day] [hour]:[minute]:[second]")169		.expect("valid format");170	let datetime = UtcDateTime::parse(&format!("{date} {time}"), &format).ok()?;171172	Some(Generation {173		id,174		current,175		datetime,176		store_path: PathBuf::new(),177		location: GenerationStorage::Machine,178	})179}180// TODO: Move command helpers away with connectivity refactor181impl ConfigHost {182	pub async fn list_generations(&self, profile: &str) -> Result<Vec<Generation>> {183		let mut cmd = self.cmd("nix-env").await?;184		cmd.comparg("--profile", format!("/nix/var/nix/profiles/{profile}"))185			.arg("--list-generations")186			.env("TZ", "UTC");187		// Sudo is required because --list-generations tries to acquire profile lock188		let data = cmd.sudo().run_string().await?;189		let mut generations = data190			.split('\n')191			.map(|e| e.trim())192			.filter(|&l| !l.is_empty())193			.filter_map(|g| {194				let generation = parse_generation_line(g);195				if generation.is_none() {196					warn!("bad generation: {g}");197				};198				generation199			})200			.collect::<Vec<_>>();201		for ele in generations.iter_mut() {202			let mut cmd = self.cmd("readlink").await?;203			cmd.arg("--")204				.arg(format!("/nix/var/nix/profiles/{profile}-{}-link", ele.id));205			let path = cmd.run_string().await?;206			ele.store_path = PathBuf::from(path.trim_end_matches("\n"));207		}208209		Ok(generations)210	}211212	pub fn set_deploy_kind(&self, kind: DeployKind) {213		self.deploy_kind214			.set(kind)215			.ok()216			.expect("deploy kind is already set");217	}218	pub async fn deploy_kind(&self) -> Result<DeployKind> {219		if let Some(kind) = self.deploy_kind.get() {220			return Ok(kind.clone());221		}222		let is_fleet_managed = match self.file_exists("/etc/FLEET_HOST").await {223			Ok(v) => v,224			Err(e) => {225				bail!("failed to query remote system kind: {}", e);226			}227		};228		if !is_fleet_managed {229			bail!(indoc::indoc! {"230				host is not marked as managed by fleet231				if you're not trying to lustrate/install system from scratch,232				you should either233					1. manually create /etc/FLEET_HOST file on the target host,234					2. use ?deploy_kind=fleet host argument if you're upgrading from older version of fleet235					3. use ?deploy_kind=upgrade_to_fleet if you're upgrading from plain nixos to fleet-managed nixos236			"});237		}238		// TOCTOU is possible239		let _ = self.deploy_kind.set(DeployKind::Fleet);240		Ok(self241			.deploy_kind242			.get()243			.expect("deploy kind is just set")244			.clone())245	}246	pub async fn escalation_strategy(&self) -> Result<EscalationStrategy> {247		// Prefer sudo, as run0 has some gotchas with polkit248		// and too many repeating prompts.249		if (self.find_in_path("sudo").await).is_ok() {250			return Ok(EscalationStrategy::Sudo);251		}252		if (self.find_in_path("run0").await).is_ok() {253			return Ok(EscalationStrategy::Run0);254		}255		Ok(EscalationStrategy::Su)256	}257	async fn open_session(&self) -> Result<Arc<openssh::Session>> {258		assert!(!self.local, "do not open ssh connection to local session");259		// FIXME: TOCTOU260		if let Some(session) = &self.session.get() {261			return Ok((*session).clone());262		};263		let session = SessionBuilder::default();264		let session = session265			.connect(&self.name)266			.await267			.map_err(|e| anyhow!("ssh error while connecting to {}: {e:#?}", self.name))?;268		let session = Arc::new(session);269		self.session.set(session.clone()).expect("TOCTOU happened");270		Ok(session)271	}272	pub async fn mktemp_dir(&self) -> Result<String> {273		let mut cmd = self.cmd("mktemp").await?;274		cmd.arg("-d");275		let path = cmd.run_string().await?;276		Ok(path.trim_end().to_owned())277	}278	pub async fn file_exists(&self, path: impl AsRef<OsStr>) -> Result<bool> {279		let mut cmd = self.cmd("sh").await?;280		cmd.arg("-c")281			.arg("test -e \"$1\" && echo true || echo false")282			.arg("_")283			.arg(path);284		Ok(cmd.run_value().await?)285	}286	pub async fn read_file_bin(&self, path: impl AsRef<OsStr>) -> Result<Vec<u8>> {287		let mut cmd = self.cmd("cat").await?;288		cmd.arg(path);289		cmd.run_bytes().await290	}291	pub async fn read_file_text(&self, path: impl AsRef<OsStr>) -> Result<String> {292		let mut cmd = self.cmd("cat").await?;293		cmd.arg(path);294		cmd.run_string().await295	}296	pub async fn read_dir(&self, path: impl AsRef<OsStr>) -> Result<Vec<String>> {297		let mut cmd = self.cmd("ls").await?;298		cmd.arg(path);299		let out = cmd.run_string().await?;300		let mut lines = out.split('\n');301		if let Some(last) = lines.next_back() {302			ensure!(last.is_empty(), "output of ls should end with newline");303		}304		Ok(lines.map(ToOwned::to_owned).collect())305	}306	#[allow(dead_code)]307	pub async fn read_file_json<D: DeserializeOwned>(&self, path: impl AsRef<OsStr>) -> Result<D> {308		let text = self.read_file_text(path).await?;309		Ok(serde_json::from_str(&text)?)310	}311	pub async fn read_env(&self, env: &str) -> Result<String> {312		let mut cmd = self.cmd("printenv").await?;313		cmd.arg(env);314		cmd.run_string().await315	}316	pub async fn find_in_path(&self, command: &str) -> Result<String> {317		// // `which` is not a part of coreutils, and it might not exist on machine.318		// let path = self.read_env("PATH").await?;319		// // Assuming delimiter is :, we don't work with windows host, this check will be much320		// // more sophisticated in remowt backend (and quicker, since actual PATH search will be done on remote machine)321		// for ele in path.split(':') {322		// 	let test_path = format!("{ele}/{cmd}");323		// 	test -x etc324		// }325		// let mut cmd = self.cmd("printenv").await?;326		// cmd.arg(env);327		// Ok(cmd.run_string().await?)328		// Assuming this is an environment issue if which doesn't exist, will be fixed with remowt.329		let mut cmd = self330			.cmd_escalation(331				// Not used332				EscalationStrategy::Su,333				"which",334			)335			.await?;336		cmd.arg(command);337		cmd.run_string().await338	}339	pub async fn read_file_value<D: FromStr>(&self, path: impl AsRef<OsStr>) -> Result<D>340	where341		<D as FromStr>::Err: Display,342	{343		let text = self.read_file_text(path).await?;344		D::from_str(&text).map_err(|e| anyhow!("failed to parse value: {e}"))345	}346	pub async fn cmd(&self, cmd: impl AsRef<OsStr>) -> Result<MyCommand> {347		self.cmd_escalation(self.escalation_strategy().await?, cmd)348			.await349	}350	pub async fn cmd_escalation(351		&self,352		escalation: EscalationStrategy,353		cmd: impl AsRef<OsStr>,354	) -> Result<MyCommand> {355		if self.local {356			Ok(MyCommand::new(escalation, cmd))357		} else {358			let session = self.open_session().await?;359			Ok(MyCommand::new_on(escalation, cmd, session))360		}361	}362	pub async fn nix_cmd(&self) -> Result<MyCommand> {363		let mut nix = self.cmd("nix").await?;364		nix.args([365			"--extra-experimental-features",366			"nix-command",367			"--extra-experimental-features",368			"flakes",369		]);370		Ok(nix)371	}372373	pub async fn decrypt(&self, data: SecretData) -> Result<Vec<u8>> {374		ensure!(data.encrypted, "secret is not encrypted");375		let mut cmd = self.cmd("fleet-install-secrets").await?;376		cmd.arg("decrypt").eqarg("--secret", data.to_string());377		let encoded = cmd378			.sudo()379			.run_string()380			.await381			.context("failed to call remote host for decrypt")?;382		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;383		ensure!(!data.encrypted, "secret came out encrypted");384		Ok(data.data)385	}386	pub async fn reencrypt(&self, data: SecretData, targets: Vec<String>) -> Result<SecretData> {387		ensure!(data.encrypted, "secret is not encrypted");388		let mut cmd = self.cmd("fleet-install-secrets").await?;389		cmd.arg("reencrypt").eqarg("--secret", data.to_string());390		for target in targets {391			let key = self.config.key(&target).await?;392			cmd.eqarg("--targets", key);393		}394		let encoded = cmd395			.sudo()396			.run_string()397			.await398			.context("failed to call remote host for decrypt")?;399		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;400		ensure!(data.encrypted, "secret came out not encrypted");401		Ok(data)402	}403	/// Returns path for futureproofing, as path might change i.e on conversion to CA404	pub async fn remote_derivation(&self, path: &PathBuf) -> Result<PathBuf> {405		if self.local {406			// Path is located locally, thus already trusted.407			return Ok(path.to_owned());408		}409		let mut nix = MyCommand::new(410			// Not used411			EscalationStrategy::Su,412			"nix",413		);414		nix.arg("copy").arg("--substitute-on-destination");415416		match self.deploy_kind().await? {417			DeployKind::Fleet | DeployKind::UpgradeToFleet | DeployKind::NixosLustrate => {418				nix.comparg("--to", format!("ssh-ng://{}", self.name));419			}420			DeployKind::NixosInstall => {421				nix422					// Signature checking makes no sense with remote-store store argument set, as we're not even interacting with remote nix daemon423					.arg("--no-check-sigs")424					.comparg(425						"--to",426						format!("ssh-ng://root@{}?remote-store=/mnt", self.name),427					);428			}429		}430		nix.arg(path);431		nix.run_nix().await.context("nix copy")?;432		Ok(path.to_owned())433	}434	pub async fn systemctl_stop(&self, name: &str) -> Result<()> {435		let mut cmd = self.cmd("systemctl").await?;436		cmd.arg("stop").arg(name);437		cmd.sudo().run().await438	}439	pub async fn systemctl_start(&self, name: &str) -> Result<()> {440		let mut cmd = self.cmd("systemctl").await?;441		cmd.arg("start").arg(name);442		cmd.sudo().run().await443	}444445	pub async fn rm_file(&self, path: impl AsRef<OsStr>, sudo: bool) -> Result<()> {446		let mut cmd = self.cmd("rm").await?;447		cmd.arg("-f").arg(path);448		if sudo {449			cmd = cmd.sudo()450		}451		cmd.run().await452	}453}454impl ConfigHost {455	// TOCTOU is possible here in case if config is changed, but this case is not handled anywhere anyway,456	// assuming getting tags always returns the same value.457	pub async fn tags(&self) -> Result<Vec<String>> {458		if let Some(v) = self.groups.get() {459			return Ok(v.clone());460		}461		let Some(host_config) = &self.host_config else {462			return Ok(vec![]);463		};464		let tags: Vec<String> = nix_go_json!(host_config.tags);465466		let _ = self.groups.set(tags.clone());467468		Ok(tags)469	}470	pub async fn nixos_config(&self) -> Result<Value> {471		if let Some(v) = self.nixos_config.get() {472			return Ok(v.clone());473		}474		let Some(host_config) = &self.host_config else {475			bail!("local host has no nixos_config");476		};477		let nixos_config = nix_go!(host_config.nixos.config);478		assert_warn("nixos config evaluation", &nixos_config).await?;479480		let _ = self.nixos_config.set(nixos_config.clone());481482		Ok(nixos_config)483	}484	pub async fn nixos_unchecked_config(&self) -> Result<Value> {485		if let Some(v) = self.nixos_unchecked_config.get() {486			return Ok(v.clone());487		}488		let Some(host_config) = &self.host_config else {489			bail!("local host has no nixos_config");490		};491		let nixos_config = nix_go!(host_config.nixos_unchecked.config);492493		let _ = self.nixos_unchecked_config.set(nixos_config.clone());494495		Ok(nixos_config)496	}497498	pub async fn list_configured_secrets(&self) -> Result<Vec<String>> {499		let nixos = self.nixos_unchecked_config().await?;500		let secrets = nix_go!(nixos.secrets);501		let mut out = Vec::new();502		for name in secrets.list_fields().await? {503			let secret = nix_go!(secrets[{ name }]);504			let is_shared: bool = nix_go_json!(secret.shared);505			if is_shared {506				continue;507			}508			out.push(name);509		}510		Ok(out)511	}512	pub async fn secret_field(&self, name: &str) -> Result<Value> {513		let nixos = self.nixos_unchecked_config().await?;514		Ok(nix_go!(nixos.secrets[{ name }]))515	}516517	/// Packages for this host, resolved with nixpkgs overlays518	pub async fn pkgs(&self) -> Result<Value> {519		if let Some(value) = &self.pkgs_override {520			return Ok(value.clone());521		}522		let Some(host_config) = &self.host_config else {523			bail!("local host has no host_config");524		};525		// TODO: Should nixos.options be cached?526		Ok(nix_go!(host_config.nixos.options._module.args.value.pkgs))527	}528}529530impl Config {531	pub async fn tagged_hostnames(&self, tag: &str) -> Result<Vec<String>> {532		let config = &self.config_field;533		let tagged: Vec<String> = nix_go_json!(config.taggedWith[{ tag }]);534		Ok(tagged)535	}536	pub async fn expand_owner_set(&self, owners: Vec<String>) -> Result<BTreeSet<String>> {537		let mut out = BTreeSet::new();538		for owner in owners {539			if let Some(tag) = owner.strip_prefix('@') {540				let hosts = self.tagged_hostnames(tag).await?;541				out.extend(hosts);542			} else {543				out.insert(owner);544			}545		}546		Ok(out)547	}548	pub fn local_host(&self) -> ConfigHost {549		ConfigHost {550			config: self.clone(),551			name: "<virtual localhost>".to_owned(),552			host_config: None,553			nixos_config: OnceCell::new(),554			nixos_unchecked_config: OnceCell::new(),555			groups: {556				let cell = OnceCell::new();557				let _ = cell.set(vec![]);558				cell559			},560			pkgs_override: Some(self.default_pkgs.clone()),561562			local: true,563			session: OnceLock::new(),564			deploy_kind: OnceCell::new(),565		}566	}567568	pub async fn host(&self, name: &str) -> Result<ConfigHost> {569		let config = &self.config_field;570		let host_config = nix_go!(config.hosts[{ name }]);571572		Ok(ConfigHost {573			config: self.clone(),574			name: name.to_owned(),575			host_config: Some(host_config),576			nixos_config: OnceCell::new(),577			nixos_unchecked_config: OnceCell::new(),578			groups: OnceCell::new(),579			pkgs_override: None,580581			// TODO: Remove with connectivit refactor582			local: self.localhost == name,583			session: OnceLock::new(),584			deploy_kind: OnceCell::new(),585		})586	}587	pub async fn list_hosts(&self) -> Result<Vec<ConfigHost>> {588		let config = &self.config_field;589		let names = nix_go!(config.hosts).list_fields().await?;590		let mut out = vec![];591		for name in names {592			out.push(self.host(&name).await?);593		}594		Ok(out)595	}596	// TODO: Replace usages with .host().nixos_config597	pub async fn system_config(&self, host: &str) -> Result<Value> {598		let fleet_field = &self.config_field;599		Ok(nix_go!(fleet_field.hosts[{ host }].nixos.config))600	}601602	/// Shared secrets configured in fleet.nix or in flake603	pub async fn list_configured_shared(&self) -> Result<Vec<String>> {604		let config_field = &self.config_field;605		Ok(nix_go!(config_field.sharedSecrets).list_fields().await?)606	}607	/// Shared secrets configured in fleet.nix608	pub fn list_shared(&self) -> Vec<String> {609		let data = self.data();610		data.shared_secrets.keys().cloned().collect()611	}612	pub fn has_shared(&self, name: &str) -> bool {613		let data = self.data();614		data.shared_secrets.contains_key(name)615	}616	pub fn replace_shared(&self, name: String, shared: FleetSharedSecret) {617		let mut data = self.data_mut();618		data.shared_secrets.insert(name.to_owned(), shared);619	}620	pub fn remove_shared(&self, secret: &str) {621		let mut data = self.data_mut();622		data.shared_secrets.remove(secret);623	}624625	pub fn list_secrets(&self, host: &str) -> Vec<String> {626		let data = self.data();627		let Some(secrets) = data.host_secrets.get(host) else {628			return Vec::new();629		};630		secrets.keys().cloned().collect()631	}632633	pub fn has_secret(&self, host: &str, secret: &str) -> bool {634		let data = self.data();635		let Some(host_secrets) = data.host_secrets.get(host) else {636			return false;637		};638		host_secrets.contains_key(secret)639	}640	pub fn insert_secret(&self, host: &str, secret: String, value: FleetSecret) {641		let mut data = self.data_mut();642		let host_secrets = data.host_secrets.entry(host.to_owned()).or_default();643		host_secrets.insert(secret, value);644	}645646	pub fn host_secret(&self, host: &str, secret: &str) -> Result<FleetSecret> {647		let data = self.data();648		let Some(host_secrets) = data.host_secrets.get(host) else {649			bail!("no secrets for machine {host}");650		};651		let Some(secret) = host_secrets.get(secret) else {652			bail!("machine {host} has no secret {secret}");653		};654		Ok(secret.clone())655	}656	pub fn shared_secret(&self, secret: &str) -> Result<FleetSharedSecret> {657		let data = self.data();658		let Some(secret) = data.shared_secrets.get(secret) else {659			bail!("no shared secret {secret}");660		};661		Ok(secret.clone())662	}663	pub async fn shared_secret_expected_owners(&self, secret: &str) -> Result<Vec<String>> {664		let config_field = &self.config_field;665		Ok(nix_go_json!(666			config_field.sharedSecrets[{ secret }].expectedOwners667		))668	}669670	// TODO: Should this be something modifiable from other processes?671	// E.g terraform provider might want to update FleetData (e.g secrets),672	// and current implementation assumes only one process holds current fleet.nix673	// Given that it is no longer needs to be a file for nix evaluation,674	// maybe it can be a .nix file for persistence, but accessible only675	// thru some shared state controller? Might it be stored in terraform676	// state provider?677	pub fn data(&self) -> MutexGuard<FleetData> {678		self.data.lock().unwrap()679	}680	pub fn data_mut(&self) -> MutexGuard<FleetData> {681		self.data.lock().unwrap()682	}683	pub fn save(&self) -> Result<()> {684		let mut tempfile = NamedTempFile::new_in(self.directory.clone()).context("failed to create updated version of fleet.nix in the same directory as original.\nDo you have write access to it? Access only to the fleet.nix won't be enough, the directory is used for atomic overwrite operation.\nIt is not recommended to use fleet by root anyway, move fleet project to your home directory.")?;685		let data = nixlike::serialize(&self.data() as &FleetData)?;686		tempfile.write_all(687			format!(688				"# This file contains fleet state and shouldn't be edited by hand\n\n{}\n\n# vim: ts=2 et nowrap\n",689				data690			)691			.as_bytes(),692		)?;693		let mut fleet_data_path = self.directory.clone();694		fleet_data_path.push("fleet.nix");695		tempfile.persist(fleet_data_path)?;696		Ok(())697	}698}