git.delta.rocks / jrsonnet / refs/commits / 757475fe4cab

difftreelog

source

crates/fleet-base/src/host.rs21.1 KiBsourcehistory
1use std::{2	cell::OnceCell,3	collections::BTreeSet,4	ffi::{OsStr, OsString},5	fmt::Display,6	io::Write,7	ops::Deref,8	path::PathBuf,9	str::FromStr,10	sync::{Arc, Mutex, MutexGuard, OnceLock},11};1213use anyhow::{Context, Result, anyhow, bail, ensure};14use fleet_shared::SecretData;15use nix_eval::{NixSession, Value, nix_go, nix_go_json, util::assert_warn};16use openssh::SessionBuilder;17use serde::de::DeserializeOwned;18use tabled::Tabled;19use tempfile::NamedTempFile;20use time::{UtcDateTime, format_description};21use tracing::warn;2223use crate::{24	command::MyCommand,25	fleetdata::{FleetData, FleetSecret, FleetSharedSecret},26};2728pub struct FleetConfigInternals {29	/// Fleet project directory, containing fleet.nix file.30	pub directory: PathBuf,31	/// builtins.currentSystem32	pub local_system: String,33	pub data: Mutex<FleetData>,34	pub nix_args: Vec<OsString>,35	/// fleet_config.config36	pub config_field: Value,37	// TODO: Remove with connectivity refactor38	pub localhost: String,3940	/// import nixpkgs {system = local};41	pub default_pkgs: Value,42	/// inputs.nixpkgs43	pub nixpkgs: Value,4445	pub nix_session: NixSession,46}4748// TODO: Make field not pub49#[derive(Clone)]50pub struct Config(pub Arc<FleetConfigInternals>);5152impl Deref for Config {53	type Target = FleetConfigInternals;5455	fn deref(&self) -> &Self::Target {56		&self.057	}58}5960#[derive(Clone, Copy, Debug)]61pub enum EscalationStrategy {62	Sudo,63	Run0,64	Su,65}6667#[derive(Clone, PartialEq, Copy, Debug)]68pub enum DeployKind {69	/// NixOS => NixOS managed by fleet70	UpgradeToFleet,71	/// NixOS managed by fleet => NixOS managed by fleet72	Fleet,73	/// Remote host has /mnt, /mnt/boot mounted,74	/// generated config is added to fleet configuration.75	NixosInstall,76	/// Remote host has some system and nix installed in multi-user mode (/nix is owned by root),77	/// generated config is added to fleet configuration,78	/// and /etc/NIXOS_LUSTRATE exists, fleet will perform the rest.79	NixosLustrate,80}8182impl FromStr for DeployKind {83	type Err = anyhow::Error;84	fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {85		match s {86			"upgrade-to-fleet" => Ok(Self::UpgradeToFleet),87			"fleet" => Ok(Self::Fleet),88			"nixos-install" => Ok(Self::NixosInstall),89			"nixos-lustrate" => Ok(Self::NixosLustrate),90			v => bail!(91				"unknown deploy_kind: {v}; expected on of \"upgrade-to-fleet\", \"fleet\", \"nixos-install\", \"nixos-lustrate\""92			),93		}94	}95}96pub struct ConfigHost {97	config: Config,98	pub name: String,99	groups: OnceCell<Vec<String>>,100101	// TODO: Both of those values are taken from host opts, there should be a cleaner way to specify it102	deploy_kind: OnceCell<DeployKind>,103	session_destination: OnceCell<String>,104105	pub host_config: Option<Value>,106	pub nixos_config: OnceCell<Value>,107	pub nixos_unchecked_config: OnceCell<Value>,108	pub pkgs_override: Option<Value>,109110	// TODO: Move command helpers away with connectivity refactor111	pub local: bool,112	pub session: OnceLock<Arc<openssh::Session>>,113}114115#[derive(Debug, Clone, Copy)]116pub enum GenerationStorage {117	Deployer,118	Machine,119	Pusher,120}121impl GenerationStorage {122	fn prefix(&self) -> &'static str {123		match self {124			GenerationStorage::Deployer => "deployer.",125			GenerationStorage::Machine => "",126			GenerationStorage::Pusher => "pusher.",127		}128	}129}130131#[derive(Tabled, Debug)]132pub struct Generation {133	#[tabled(rename = "ID", format("{}", self.rollback_id()))]134	pub id: u32,135	#[tabled(rename = "Current")]136	pub current: bool,137	#[tabled(rename = "Created at")]138	pub datetime: UtcDateTime,139	#[tabled(format = "{:?}")]140	pub store_path: PathBuf,141	#[tabled(skip)]142	pub location: GenerationStorage,143}144impl Generation {145	pub fn rollback_id(&self) -> String {146		format!("{}{}", self.location.prefix(), self.id)147	}148}149150fn parse_generation_line(g: &str) -> Option<Generation> {151	let mut parts = g.split_whitespace();152	let id = parts.next()?;153	let id: u32 = id.parse().ok()?;154	let date = parts.next()?;155	let time = parts.next()?;156	let current = if let Some(current) = parts.next() {157		if current == "(current)" {158			Some(true)159		} else {160			None161		}162	} else {163		Some(false)164	};165	let current = current?;166	if parts.next().is_some() {167		warn!("unexpected text after generation: {g}");168	}169170	let format = format_description::parse("[year]-[month]-[day] [hour]:[minute]:[second]")171		.expect("valid format");172	let datetime = UtcDateTime::parse(&format!("{date} {time}"), &format).ok()?;173174	Some(Generation {175		id,176		current,177		datetime,178		store_path: PathBuf::new(),179		location: GenerationStorage::Machine,180	})181}182// TODO: Move command helpers away with connectivity refactor183impl ConfigHost {184	pub async fn list_generations(&self, profile: &str) -> Result<Vec<Generation>> {185		let mut cmd = self.cmd("nix-env").await?;186		cmd.comparg("--profile", format!("/nix/var/nix/profiles/{profile}"))187			.arg("--list-generations")188			.env("TZ", "UTC");189		// Sudo is required because --list-generations tries to acquire profile lock190		let data = cmd.sudo().run_string().await?;191		let mut generations = data192			.split('\n')193			.map(|e| e.trim())194			.filter(|&l| !l.is_empty())195			.filter_map(|g| {196				let generation = parse_generation_line(g);197				if generation.is_none() {198					warn!("bad generation: {g}");199				};200				generation201			})202			.collect::<Vec<_>>();203		for ele in generations.iter_mut() {204			let mut cmd = self.cmd("readlink").await?;205			cmd.arg("--")206				.arg(format!("/nix/var/nix/profiles/{profile}-{}-link", ele.id));207			let path = cmd.run_string().await?;208			ele.store_path = PathBuf::from(path.trim_end_matches("\n"));209		}210211		Ok(generations)212	}213214	pub fn set_session_destination(&self, dest: String) {215		self.session_destination216			.set(dest)217			.expect("session destination is already set")218	}219	pub fn set_deploy_kind(&self, kind: DeployKind) {220		self.deploy_kind221			.set(kind)222			.expect("deploy kind is already set");223	}224	pub async fn deploy_kind(&self) -> Result<DeployKind> {225		if let Some(kind) = self.deploy_kind.get() {226			return Ok(*kind);227		}228		let is_fleet_managed = match self.file_exists("/etc/FLEET_HOST").await {229			Ok(v) => v,230			Err(e) => {231				bail!("failed to query remote system kind: {}", e);232			}233		};234		if !is_fleet_managed {235			bail!(indoc::indoc! {"236				host is not marked as managed by fleet237				if you're not trying to lustrate/install system from scratch,238				you should either239					1. manually create /etc/FLEET_HOST file on the target host,240					2. use ?deploy_kind=fleet host argument if you're upgrading from older version of fleet241					3. use ?deploy_kind=upgrade_to_fleet if you're upgrading from plain nixos to fleet-managed nixos242			"});243		}244		// TOCTOU is possible245		let _ = self.deploy_kind.set(DeployKind::Fleet);246		Ok(*self.deploy_kind.get().expect("deploy kind is just set"))247	}248	pub async fn escalation_strategy(&self) -> Result<EscalationStrategy> {249		// Prefer sudo, as run0 has some gotchas with polkit250		// and too many repeating prompts.251		if (self.find_in_path("sudo").await).is_ok() {252			return Ok(EscalationStrategy::Sudo);253		}254		if (self.find_in_path("run0").await).is_ok() {255			return Ok(EscalationStrategy::Run0);256		}257		Ok(EscalationStrategy::Su)258	}259	async fn open_session(&self) -> Result<Arc<openssh::Session>> {260		assert!(!self.local, "do not open ssh connection to local session");261		// FIXME: TOCTOU262		if let Some(session) = &self.session.get() {263			return Ok((*session).clone());264		};265		let session = SessionBuilder::default();266267		let dest = self.session_destination.get().unwrap_or(&self.name);268		let session = session269			.connect(&dest)270			.await271			.map_err(|e| anyhow!("ssh error while connecting to {}: {e:#?}", self.name))?;272		let session = Arc::new(session);273		self.session.set(session.clone()).expect("TOCTOU happened");274		Ok(session)275	}276	pub async fn mktemp_dir(&self) -> Result<String> {277		let mut cmd = self.cmd("mktemp").await?;278		cmd.arg("-d");279		let path = cmd.run_string().await?;280		Ok(path.trim_end().to_owned())281	}282	pub async fn file_exists(&self, path: impl AsRef<OsStr>) -> Result<bool> {283		let mut cmd = self.cmd("sh").await?;284		cmd.arg("-c")285			.arg("test -e \"$1\" && echo true || echo false")286			.arg("_")287			.arg(path);288		cmd.run_value().await289	}290	pub async fn read_file_bin(&self, path: impl AsRef<OsStr>) -> Result<Vec<u8>> {291		let mut cmd = self.cmd("cat").await?;292		cmd.arg(path);293		cmd.run_bytes().await294	}295	pub async fn read_file_text(&self, path: impl AsRef<OsStr>) -> Result<String> {296		let mut cmd = self.cmd("cat").await?;297		cmd.arg(path);298		cmd.run_string().await299	}300	pub async fn read_dir(&self, path: impl AsRef<OsStr>) -> Result<Vec<String>> {301		let mut cmd = self.cmd("ls").await?;302		cmd.arg(path);303		let out = cmd.run_string().await?;304		let mut lines = out.split('\n');305		if let Some(last) = lines.next_back() {306			ensure!(last.is_empty(), "output of ls should end with newline");307		}308		Ok(lines.map(ToOwned::to_owned).collect())309	}310	#[allow(dead_code)]311	pub async fn read_file_json<D: DeserializeOwned>(&self, path: impl AsRef<OsStr>) -> Result<D> {312		let text = self.read_file_text(path).await?;313		Ok(serde_json::from_str(&text)?)314	}315	pub async fn read_env(&self, env: &str) -> Result<String> {316		let mut cmd = self.cmd("printenv").await?;317		cmd.arg(env);318		cmd.run_string().await319	}320	pub async fn find_in_path(&self, command: &str) -> Result<String> {321		// // `which` is not a part of coreutils, and it might not exist on machine.322		// let path = self.read_env("PATH").await?;323		// // Assuming delimiter is :, we don't work with windows host, this check will be much324		// // more sophisticated in remowt backend (and quicker, since actual PATH search will be done on remote machine)325		// for ele in path.split(':') {326		// 	let test_path = format!("{ele}/{cmd}");327		// 	test -x etc328		// }329		// let mut cmd = self.cmd("printenv").await?;330		// cmd.arg(env);331		// Ok(cmd.run_string().await?)332		// Assuming this is an environment issue if which doesn't exist, will be fixed with remowt.333		let mut cmd = self334			.cmd_escalation(335				// Not used336				EscalationStrategy::Su,337				"which",338			)339			.await?;340		cmd.arg(command);341		cmd.run_string().await342	}343	pub async fn read_file_value<D: FromStr>(&self, path: impl AsRef<OsStr>) -> Result<D>344	where345		<D as FromStr>::Err: Display,346	{347		let text = self.read_file_text(path).await?;348		D::from_str(&text).map_err(|e| anyhow!("failed to parse value: {e}"))349	}350	pub async fn cmd(&self, cmd: impl AsRef<OsStr>) -> Result<MyCommand> {351		self.cmd_escalation(self.escalation_strategy().await?, cmd)352			.await353	}354	pub async fn cmd_escalation(355		&self,356		escalation: EscalationStrategy,357		cmd: impl AsRef<OsStr>,358	) -> Result<MyCommand> {359		if self.local {360			Ok(MyCommand::new(escalation, cmd))361		} else {362			let session = self.open_session().await?;363			Ok(MyCommand::new_on(escalation, cmd, session))364		}365	}366	pub async fn nix_cmd(&self) -> Result<MyCommand> {367		let mut nix = self.cmd("nix").await?;368		nix.args([369			"--extra-experimental-features",370			"nix-command",371			"--extra-experimental-features",372			"flakes",373		]);374		Ok(nix)375	}376377	pub async fn decrypt(&self, data: SecretData) -> Result<Vec<u8>> {378		ensure!(data.encrypted, "secret is not encrypted");379		let mut cmd = self.cmd("fleet-install-secrets").await?;380		cmd.arg("decrypt").eqarg("--secret", data.to_string());381		let encoded = cmd382			.sudo()383			.run_string()384			.await385			.context("failed to call remote host for decrypt")?;386		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;387		ensure!(!data.encrypted, "secret came out encrypted");388		Ok(data.data)389	}390	pub async fn reencrypt(&self, data: SecretData, targets: Vec<String>) -> Result<SecretData> {391		ensure!(data.encrypted, "secret is not encrypted");392		let mut cmd = self.cmd("fleet-install-secrets").await?;393		cmd.arg("reencrypt").eqarg("--secret", data.to_string());394		for target in targets {395			let key = self.config.key(&target).await?;396			cmd.eqarg("--targets", key);397		}398		let encoded = cmd399			.sudo()400			.run_string()401			.await402			.context("failed to call remote host for decrypt")?;403		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;404		ensure!(data.encrypted, "secret came out not encrypted");405		Ok(data)406	}407	/// Returns path for futureproofing, as path might change i.e on conversion to CA408	pub async fn remote_derivation(&self, path: &PathBuf) -> Result<PathBuf> {409		if self.local {410			// Path is located locally, thus already trusted.411			return Ok(path.to_owned());412		}413		let mut nix = MyCommand::new(414			// Not used415			EscalationStrategy::Su,416			"nix",417		);418		nix.arg("copy").arg("--substitute-on-destination");419420		match self.deploy_kind().await? {421			DeployKind::Fleet | DeployKind::UpgradeToFleet | DeployKind::NixosLustrate => {422				nix.comparg("--to", format!("ssh-ng://{}", self.name));423			}424			DeployKind::NixosInstall => {425				nix426					// Signature checking makes no sense with remote-store store argument set, as we're not even interacting with remote nix daemon427					.arg("--no-check-sigs")428					.comparg(429						"--to",430						format!("ssh-ng://root@{}?remote-store=/mnt", self.name),431					);432			}433		}434		nix.arg(path);435		nix.run_nix().await.context("nix copy")?;436		Ok(path.to_owned())437	}438	pub async fn systemctl_stop(&self, name: &str) -> Result<()> {439		let mut cmd = self.cmd("systemctl").await?;440		cmd.arg("stop").arg(name);441		cmd.sudo().run().await442	}443	pub async fn systemctl_start(&self, name: &str) -> Result<()> {444		let mut cmd = self.cmd("systemctl").await?;445		cmd.arg("start").arg(name);446		cmd.sudo().run().await447	}448449	pub async fn rm_file(&self, path: impl AsRef<OsStr>, sudo: bool) -> Result<()> {450		let mut cmd = self.cmd("rm").await?;451		cmd.arg("-f").arg(path);452		if sudo {453			cmd = cmd.sudo()454		}455		cmd.run().await456	}457}458impl ConfigHost {459	// TOCTOU is possible here in case if config is changed, but this case is not handled anywhere anyway,460	// assuming getting tags always returns the same value.461	pub async fn tags(&self) -> Result<Vec<String>> {462		if let Some(v) = self.groups.get() {463			return Ok(v.clone());464		}465		let Some(host_config) = &self.host_config else {466			return Ok(vec![]);467		};468		let tags: Vec<String> = nix_go_json!(host_config.tags);469470		let _ = self.groups.set(tags.clone());471472		Ok(tags)473	}474	pub async fn nixos_config(&self) -> Result<Value> {475		if let Some(v) = self.nixos_config.get() {476			return Ok(v.clone());477		}478		let Some(host_config) = &self.host_config else {479			bail!("local host has no nixos_config");480		};481		let nixos_config = nix_go!(host_config.nixos.config);482		assert_warn("nixos config evaluation", &nixos_config).await?;483484		let _ = self.nixos_config.set(nixos_config.clone());485486		Ok(nixos_config)487	}488	pub async fn nixos_unchecked_config(&self) -> Result<Value> {489		if let Some(v) = self.nixos_unchecked_config.get() {490			return Ok(v.clone());491		}492		let Some(host_config) = &self.host_config else {493			bail!("local host has no nixos_config");494		};495		let nixos_config = nix_go!(host_config.nixos_unchecked.config);496497		let _ = self.nixos_unchecked_config.set(nixos_config.clone());498499		Ok(nixos_config)500	}501502	pub async fn list_configured_secrets(&self) -> Result<Vec<String>> {503		let nixos = self.nixos_unchecked_config().await?;504		let secrets = nix_go!(nixos.secrets);505		let mut out = Vec::new();506		for name in secrets.list_fields().await? {507			let secret = nix_go!(secrets[{ name }]);508			let is_shared: bool = nix_go_json!(secret.shared);509			if is_shared {510				continue;511			}512			out.push(name);513		}514		Ok(out)515	}516	pub async fn secret_field(&self, name: &str) -> Result<Value> {517		let nixos = self.nixos_unchecked_config().await?;518		Ok(nix_go!(nixos.secrets[{ name }]))519	}520521	/// Packages for this host, resolved with nixpkgs overlays522	pub async fn pkgs(&self) -> Result<Value> {523		if let Some(value) = &self.pkgs_override {524			return Ok(value.clone());525		}526		let Some(host_config) = &self.host_config else {527			bail!("local host has no host_config");528		};529		// TODO: Should nixos.options be cached?530		Ok(nix_go!(host_config.nixos.options._module.args.value.pkgs))531	}532}533534impl Config {535	pub async fn tagged_hostnames(&self, tag: &str) -> Result<Vec<String>> {536		let config = &self.config_field;537		let tagged: Vec<String> = nix_go_json!(config.taggedWith[{ tag }]);538		Ok(tagged)539	}540	pub async fn expand_owner_set(&self, owners: Vec<String>) -> Result<BTreeSet<String>> {541		let mut out = BTreeSet::new();542		for owner in owners {543			if let Some(tag) = owner.strip_prefix('@') {544				let hosts = self.tagged_hostnames(tag).await?;545				out.extend(hosts);546			} else {547				out.insert(owner);548			}549		}550		Ok(out)551	}552	pub fn local_host(&self) -> ConfigHost {553		ConfigHost {554			config: self.clone(),555			name: "<virtual localhost>".to_owned(),556			host_config: None,557			nixos_config: OnceCell::new(),558			nixos_unchecked_config: OnceCell::new(),559			groups: {560				let cell = OnceCell::new();561				let _ = cell.set(vec![]);562				cell563			},564			pkgs_override: Some(self.default_pkgs.clone()),565566			local: true,567			session: OnceLock::new(),568			deploy_kind: OnceCell::new(),569			session_destination: OnceCell::new(),570		}571	}572573	pub async fn host(&self, name: &str) -> Result<ConfigHost> {574		let config = &self.config_field;575		let host_config = nix_go!(config.hosts[{ name }]);576577		Ok(ConfigHost {578			config: self.clone(),579			name: name.to_owned(),580			host_config: Some(host_config),581			nixos_config: OnceCell::new(),582			nixos_unchecked_config: OnceCell::new(),583			groups: OnceCell::new(),584			pkgs_override: None,585586			// TODO: Remove with connectivit refactor587			local: self.localhost == name,588			session: OnceLock::new(),589			deploy_kind: OnceCell::new(),590			session_destination: OnceCell::new(),591		})592	}593	pub async fn list_hosts(&self) -> Result<Vec<ConfigHost>> {594		let config = &self.config_field;595		let names = nix_go!(config.hosts).list_fields().await?;596		let mut out = vec![];597		for name in names {598			out.push(self.host(&name).await?);599		}600		Ok(out)601	}602	// TODO: Replace usages with .host().nixos_config603	pub async fn system_config(&self, host: &str) -> Result<Value> {604		let fleet_field = &self.config_field;605		Ok(nix_go!(fleet_field.hosts[{ host }].nixos.config))606	}607608	/// Shared secrets configured in fleet.nix or in flake609	pub async fn list_configured_shared(&self) -> Result<Vec<String>> {610		let config_field = &self.config_field;611		Ok(nix_go!(config_field.sharedSecrets).list_fields().await?)612	}613	/// Shared secrets configured in fleet.nix614	pub fn list_shared(&self) -> Vec<String> {615		let data = self.data();616		data.shared_secrets.keys().cloned().collect()617	}618	pub fn has_shared(&self, name: &str) -> bool {619		let data = self.data();620		data.shared_secrets.contains_key(name)621	}622	pub fn replace_shared(&self, name: String, shared: FleetSharedSecret) {623		let mut data = self.data_mut();624		data.shared_secrets.insert(name.to_owned(), shared);625	}626	pub fn remove_shared(&self, secret: &str) {627		let mut data = self.data_mut();628		data.shared_secrets.remove(secret);629	}630631	pub fn list_secrets(&self, host: &str) -> Vec<String> {632		let data = self.data();633		let Some(secrets) = data.host_secrets.get(host) else {634			return Vec::new();635		};636		secrets.keys().cloned().collect()637	}638639	pub fn has_secret(&self, host: &str, secret: &str) -> bool {640		let data = self.data();641		let Some(host_secrets) = data.host_secrets.get(host) else {642			return false;643		};644		host_secrets.contains_key(secret)645	}646	pub fn insert_secret(&self, host: &str, secret: String, value: FleetSecret) {647		let mut data = self.data_mut();648		let host_secrets = data.host_secrets.entry(host.to_owned()).or_default();649		host_secrets.insert(secret, value);650	}651652	pub fn host_secret(&self, host: &str, secret: &str) -> Result<FleetSecret> {653		let data = self.data();654		let Some(host_secrets) = data.host_secrets.get(host) else {655			bail!("no secrets for machine {host}");656		};657		let Some(secret) = host_secrets.get(secret) else {658			bail!("machine {host} has no secret {secret}");659		};660		Ok(secret.clone())661	}662	pub fn shared_secret(&self, secret: &str) -> Result<FleetSharedSecret> {663		let data = self.data();664		let Some(secret) = data.shared_secrets.get(secret) else {665			bail!("no shared secret {secret}");666		};667		Ok(secret.clone())668	}669	pub async fn shared_secret_expected_owners(&self, secret: &str) -> Result<Vec<String>> {670		let config_field = &self.config_field;671		Ok(nix_go_json!(672			config_field.sharedSecrets[{ secret }].expectedOwners673		))674	}675676	// TODO: Should this be something modifiable from other processes?677	// E.g terraform provider might want to update FleetData (e.g secrets),678	// and current implementation assumes only one process holds current fleet.nix679	// Given that it is no longer needs to be a file for nix evaluation,680	// maybe it can be a .nix file for persistence, but accessible only681	// thru some shared state controller? Might it be stored in terraform682	// state provider?683	pub fn data(&self) -> MutexGuard<FleetData> {684		self.data.lock().unwrap()685	}686	pub fn data_mut(&self) -> MutexGuard<FleetData> {687		self.data.lock().unwrap()688	}689	pub fn save(&self) -> Result<()> {690		let mut tempfile = NamedTempFile::new_in(self.directory.clone()).context("failed to create updated version of fleet.nix in the same directory as original.\nDo you have write access to it? Access only to the fleet.nix won't be enough, the directory is used for atomic overwrite operation.\nIt is not recommended to use fleet by root anyway, move fleet project to your home directory.")?;691		let data = nixlike::serialize(&self.data() as &FleetData)?;692		tempfile.write_all(693			format!(694				"# This file contains fleet state and shouldn't be edited by hand\n\n{}\n\n# vim: ts=2 et nowrap\n",695				data696			)697			.as_bytes(),698		)?;699		let mut fleet_data_path = self.directory.clone();700		fleet_data_path.push("fleet.nix");701		tempfile.persist(fleet_data_path)?;702		Ok(())703	}704}