git.delta.rocks / jrsonnet / refs/commits / 45c49ea21363

difftreelog

source

crates/fleet-base/src/host.rs20.7 KiBsourcehistory
1use std::{2	cell::OnceCell,3	collections::BTreeSet,4	ffi::{OsStr, OsString},5	fmt::Display,6	io::Write,7	ops::Deref,8	path::PathBuf,9	str::FromStr,10	sync::{Arc, Mutex, MutexGuard, OnceLock},11};1213use anyhow::{Context, Result, anyhow, bail, ensure};14use fleet_shared::SecretData;15use nix_eval::{Value, nix_go, nix_go_json, util::assert_warn};16use openssh::{ControlPersist, SessionBuilder};17use serde::de::DeserializeOwned;18use tabled::Tabled;19use tempfile::NamedTempFile;20use time::{UtcDateTime, format_description};21use tracing::warn;2223use crate::{24	command::MyCommand,25	fleetdata::{FleetData, FleetSecretData, FleetSecretDistribution, FleetSecretDistributions},26};2728pub struct FleetConfigInternals {29	/// Fleet project directory, containing fleet.nix file.30	pub directory: PathBuf,31	/// builtins.currentSystem32	pub local_system: String,33	pub data: Arc<Mutex<FleetData>>,34	pub nix_args: Vec<OsString>,35	/// fleet_config.config36	pub config_field: Value,37	/// flake.output38	pub flake_outputs: Value,39	// TODO: Remove with connectivity refactor40	pub localhost: String,4142	/// import nixpkgs {system = local};43	pub default_pkgs: Value,44	/// inputs.nixpkgs45	pub nixpkgs: Value,46}4748// TODO: Make field not pub49#[derive(Clone)]50pub struct Config(pub Arc<FleetConfigInternals>);5152impl Deref for Config {53	type Target = FleetConfigInternals;5455	fn deref(&self) -> &Self::Target {56		&self.057	}58}5960#[derive(Clone, Copy, Debug)]61pub enum EscalationStrategy {62	Sudo,63	Run0,64	Su,65}6667#[derive(Clone, PartialEq, Copy, Debug)]68pub enum DeployKind {69	/// NixOS => NixOS managed by fleet70	UpgradeToFleet,71	/// NixOS managed by fleet => NixOS managed by fleet72	Fleet,73	/// Remote host has /mnt, /mnt/boot mounted,74	/// generated config is added to fleet configuration.75	NixosInstall,76	/// Remote host has some system and nix installed in multi-user mode (/nix is owned by root),77	/// generated config is added to fleet configuration,78	/// and /etc/NIXOS_LUSTRATE exists, fleet will perform the rest.79	NixosLustrate,80}8182impl FromStr for DeployKind {83	type Err = anyhow::Error;84	fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {85		match s {86			"upgrade-to-fleet" => Ok(Self::UpgradeToFleet),87			"fleet" => Ok(Self::Fleet),88			"nixos-install" => Ok(Self::NixosInstall),89			"nixos-lustrate" => Ok(Self::NixosLustrate),90			v => bail!(91				"unknown deploy_kind: {v}; expected on of \"upgrade-to-fleet\", \"fleet\", \"nixos-install\", \"nixos-lustrate\""92			),93		}94	}95}96pub struct ConfigHost {97	config: Config,98	pub name: String,99	groups: OnceCell<Vec<String>>,100101	// TODO: Both of those values are taken from host opts, there should be a cleaner way to specify it102	deploy_kind: OnceCell<DeployKind>,103	session_destination: OnceCell<String>,104	legacy_ssh_store: OnceCell<bool>,105106	pub host_config: Option<Value>,107	pub nixos_config: OnceCell<Value>,108	pub nixos_unchecked_config: OnceCell<Value>,109	pub pkgs_override: Option<Value>,110111	// TODO: Move command helpers away with connectivity refactor112	pub local: bool,113	pub session: OnceLock<Arc<openssh::Session>>,114}115116#[derive(Debug, Clone, Copy)]117pub enum GenerationStorage {118	Deployer,119	Machine,120	Pusher,121}122impl GenerationStorage {123	fn prefix(&self) -> &'static str {124		match self {125			GenerationStorage::Deployer => "deployer.",126			GenerationStorage::Machine => "",127			GenerationStorage::Pusher => "pusher.",128		}129	}130}131132#[derive(Tabled, Debug)]133pub struct Generation {134	#[tabled(rename = "ID", format("{}", self.rollback_id()))]135	pub id: u32,136	#[tabled(rename = "Current")]137	pub current: bool,138	#[tabled(rename = "Created at")]139	pub datetime: UtcDateTime,140	#[tabled(format = "{:?}")]141	pub store_path: PathBuf,142	#[tabled(skip)]143	pub location: GenerationStorage,144}145impl Generation {146	pub fn rollback_id(&self) -> String {147		format!("{}{}", self.location.prefix(), self.id)148	}149}150151fn parse_generation_line(g: &str) -> Option<Generation> {152	let mut parts = g.split_whitespace();153	let id = parts.next()?;154	let id: u32 = id.parse().ok()?;155	let date = parts.next()?;156	let time = parts.next()?;157	let current = if let Some(current) = parts.next() {158		if current == "(current)" {159			Some(true)160		} else {161			None162		}163	} else {164		Some(false)165	};166	let current = current?;167	if parts.next().is_some() {168		warn!("unexpected text after generation: {g}");169	}170171	let format = format_description::parse("[year]-[month]-[day] [hour]:[minute]:[second]")172		.expect("valid format");173	let datetime = UtcDateTime::parse(&format!("{date} {time}"), &format).ok()?;174175	Some(Generation {176		id,177		current,178		datetime,179		store_path: PathBuf::new(),180		location: GenerationStorage::Machine,181	})182}183// TODO: Move command helpers away with connectivity refactor184impl ConfigHost {185	pub async fn list_generations(&self, profile: &str) -> Result<Vec<Generation>> {186		let mut cmd = self.cmd("nix-env").await?;187		cmd.comparg("--profile", format!("/nix/var/nix/profiles/{profile}"))188			.arg("--list-generations")189			.env("TZ", "UTC");190		// Sudo is required because --list-generations tries to acquire profile lock191		let data = cmd.sudo().run_string().await?;192		let mut generations = data193			.split('\n')194			.map(|e| e.trim())195			.filter(|&l| !l.is_empty())196			.filter_map(|g| {197				let generation = parse_generation_line(g);198				if generation.is_none() {199					warn!("bad generation: {g}");200				};201				generation202			})203			.collect::<Vec<_>>();204		for ele in generations.iter_mut() {205			let mut cmd = self.cmd("readlink").await?;206			cmd.arg("--")207				.arg(format!("/nix/var/nix/profiles/{profile}-{}-link", ele.id));208			let path = cmd.run_string().await?;209			ele.store_path = PathBuf::from(path.trim_end_matches("\n"));210		}211212		Ok(generations)213	}214215	pub fn set_session_destination(&self, dest: String) {216		self.session_destination217			.set(dest)218			.expect("session destination is already set")219	}220	pub fn set_deploy_kind(&self, kind: DeployKind) {221		self.deploy_kind222			.set(kind)223			.expect("deploy kind is already set");224	}225	pub fn set_legacy_ssh_store(&self, legacy: bool) {226		self.legacy_ssh_store227			.set(legacy)228			.expect("legacy ssh store is already set")229	}230	pub async fn deploy_kind(&self) -> Result<DeployKind> {231		if let Some(kind) = self.deploy_kind.get() {232			return Ok(*kind);233		}234		let is_fleet_managed = match self.file_exists("/etc/FLEET_HOST").await {235			Ok(v) => v,236			Err(e) => {237				bail!("failed to query remote system kind: {e}");238			}239		};240		if !is_fleet_managed {241			bail!(242				"{}",243				indoc::indoc! {"244				host is not marked as managed by fleet245				if you're not trying to lustrate/install system from scratch,246				you should either247					1. manually create /etc/FLEET_HOST file on the target host,248					2. use ?deploy_kind=fleet host argument if you're upgrading from older version of fleet249					3. use ?deploy_kind=upgrade_to_fleet if you're upgrading from plain nixos to fleet-managed nixos250			"}251			);252		}253		// TOCTOU is possible254		let _ = self.deploy_kind.set(DeployKind::Fleet);255		Ok(*self.deploy_kind.get().expect("deploy kind is just set"))256	}257	pub async fn escalation_strategy(&self) -> Result<EscalationStrategy> {258		// Prefer sudo, as run0 has some gotchas with polkit259		// and too many repeating prompts.260		if (self.find_in_path("sudo").await).is_ok() {261			return Ok(EscalationStrategy::Sudo);262		}263		if (self.find_in_path("run0").await).is_ok() {264			return Ok(EscalationStrategy::Run0);265		}266		Ok(EscalationStrategy::Su)267	}268	async fn open_session(&self) -> Result<Arc<openssh::Session>> {269		assert!(!self.local, "do not open ssh connection to local session");270		// FIXME: TOCTOU271		if let Some(session) = &self.session.get() {272			return Ok((*session).clone());273		};274		let mut session = SessionBuilder::default();275		session.control_persist(ControlPersist::ClosedAfterInitialConnection);276277		let dest = self.session_destination.get().unwrap_or(&self.name);278		let session = session279			.connect(&dest)280			.await281			.map_err(|e| anyhow!("ssh error while connecting to {}: {e:#?}", self.name))?;282		let session = Arc::new(session);283		self.session.set(session.clone()).expect("TOCTOU happened");284		Ok(session)285	}286	pub async fn mktemp_dir(&self) -> Result<String> {287		let mut cmd = self.cmd("mktemp").await?;288		cmd.arg("-d");289		let path = cmd.run_string().await?;290		Ok(path.trim_end().to_owned())291	}292	pub async fn file_exists(&self, path: impl AsRef<OsStr>) -> Result<bool> {293		let mut cmd = self.cmd("sh").await?;294		cmd.arg("-c")295			.arg("test -e \"$1\" && echo true || echo false")296			.arg("_")297			.arg(path);298		cmd.run_value().await299	}300	pub async fn read_file_bin(&self, path: impl AsRef<OsStr>) -> Result<Vec<u8>> {301		let mut cmd = self.cmd("cat").await?;302		cmd.arg(path);303		cmd.run_bytes().await304	}305	pub async fn read_file_text(&self, path: impl AsRef<OsStr>) -> Result<String> {306		let mut cmd = self.cmd("cat").await?;307		cmd.arg(path);308		cmd.run_string().await309	}310	pub async fn read_dir(&self, path: impl AsRef<OsStr>) -> Result<Vec<String>> {311		let mut cmd = self.cmd("ls").await?;312		cmd.arg(path);313		let out = cmd.run_string().await?;314		let mut lines = out.split('\n');315		if let Some(last) = lines.next_back() {316			ensure!(last.is_empty(), "output of ls should end with newline");317		}318		Ok(lines.map(ToOwned::to_owned).collect())319	}320	#[allow(dead_code)]321	pub async fn read_file_json<D: DeserializeOwned>(&self, path: impl AsRef<OsStr>) -> Result<D> {322		let text = self.read_file_text(path).await?;323		Ok(serde_json::from_str(&text)?)324	}325	pub async fn read_env(&self, env: &str) -> Result<String> {326		let mut cmd = self.cmd("printenv").await?;327		cmd.arg(env);328		cmd.run_string().await329	}330	pub async fn find_in_path(&self, command: &str) -> Result<String> {331		// // `which` is not a part of coreutils, and it might not exist on machine.332		// let path = self.read_env("PATH").await?;333		// // Assuming delimiter is :, we don't work with windows host, this check will be much334		// // more sophisticated in remowt backend (and quicker, since actual PATH search will be done on remote machine)335		// for ele in path.split(':') {336		// 	let test_path = format!("{ele}/{cmd}");337		// 	test -x etc338		// }339		// let mut cmd = self.cmd("printenv").await?;340		// cmd.arg(env);341		// Ok(cmd.run_string().await?)342		// Assuming this is an environment issue if which doesn't exist, will be fixed with remowt.343		let mut cmd = self344			.cmd_escalation(345				// Not used346				EscalationStrategy::Su,347				"which",348			)349			.await?;350		cmd.arg(command);351		cmd.run_string().await352	}353	pub async fn read_file_value<D: FromStr>(&self, path: impl AsRef<OsStr>) -> Result<D>354	where355		<D as FromStr>::Err: Display,356	{357		let text = self.read_file_text(path).await?;358		D::from_str(&text).map_err(|e| anyhow!("failed to parse value: {e}"))359	}360	pub async fn cmd(&self, cmd: impl AsRef<OsStr>) -> Result<MyCommand> {361		self.cmd_escalation(self.escalation_strategy().await?, cmd)362			.await363	}364	pub async fn cmd_escalation(365		&self,366		escalation: EscalationStrategy,367		cmd: impl AsRef<OsStr>,368	) -> Result<MyCommand> {369		if self.local {370			Ok(MyCommand::new(escalation, cmd))371		} else {372			let session = self.open_session().await?;373			Ok(MyCommand::new_on(escalation, cmd, session))374		}375	}376	pub async fn nix_cmd(&self) -> Result<MyCommand> {377		let mut nix = self.cmd("nix").await?;378		nix.args([379			"--extra-experimental-features",380			"nix-command",381			"--extra-experimental-features",382			"flakes",383		]);384		Ok(nix)385	}386387	pub async fn decrypt(&self, data: SecretData) -> Result<Vec<u8>> {388		ensure!(data.encrypted, "secret is not encrypted");389		let mut cmd = self.cmd("fleet-install-secrets").await?;390		cmd.arg("decrypt").eqarg("--secret", data.to_string());391		let encoded = cmd392			.sudo()393			.run_string()394			.await395			.context("failed to call remote host for decrypt")?;396		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;397		ensure!(!data.encrypted, "secret came out encrypted");398		Ok(data.data)399	}400	pub async fn reencrypt(&self, data: SecretData, targets: Vec<String>) -> Result<SecretData> {401		ensure!(data.encrypted, "secret is not encrypted");402		let mut cmd = self.cmd("fleet-install-secrets").await?;403		cmd.arg("reencrypt").eqarg("--secret", data.to_string());404		for target in targets {405			let key = self.config.key(&target).await?;406			cmd.eqarg("--targets", key);407		}408		let encoded = cmd409			.sudo()410			.run_string()411			.await412			.context("failed to call remote host for decrypt")?;413		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;414		ensure!(data.encrypted, "secret came out not encrypted");415		Ok(data)416	}417	/// Returns path for futureproofing, as path might change i.e on conversion to CA418	pub async fn remote_derivation(&self, path: &PathBuf) -> Result<PathBuf> {419		if self.local {420			// Path is located locally, thus already trusted.421			return Ok(path.to_owned());422		}423		let mut nix = MyCommand::new(424			// Not used425			EscalationStrategy::Su,426			"nix",427		);428		nix.arg("copy").arg("--substitute-on-destination");429430		let proto = if self.legacy_ssh_store.get().cloned().unwrap_or(false) {431			"ssh"432		} else {433			"ssh-ng"434		};435436		match self.deploy_kind().await? {437			DeployKind::Fleet | DeployKind::UpgradeToFleet | DeployKind::NixosLustrate => {438				nix.comparg("--to", format!("{proto}://{}", self.name));439			}440			DeployKind::NixosInstall => {441				nix442					// Signature checking makes no sense with remote-store store argument set, as we're not even interacting with remote nix daemon443					.arg("--no-check-sigs")444					.comparg(445						"--to",446						format!("{proto}://root@{}?remote-store=/mnt", self.name),447					);448			}449		}450		nix.arg(path);451		nix.run_nix().await.context("nix copy")?;452		Ok(path.to_owned())453	}454	pub async fn systemctl_stop(&self, name: &str) -> Result<()> {455		let mut cmd = self.cmd("systemctl").await?;456		cmd.arg("stop").arg(name);457		cmd.sudo().run().await458	}459	pub async fn systemctl_start(&self, name: &str) -> Result<()> {460		let mut cmd = self.cmd("systemctl").await?;461		cmd.arg("start").arg(name);462		cmd.sudo().run().await463	}464465	pub async fn rm_file(&self, path: impl AsRef<OsStr>, sudo: bool) -> Result<()> {466		let mut cmd = self.cmd("rm").await?;467		cmd.arg("-f").arg(path);468		if sudo {469			cmd = cmd.sudo()470		}471		cmd.run().await472	}473}474impl ConfigHost {475	// TOCTOU is possible here in case if config is changed, but this case is not handled anywhere anyway,476	// assuming getting tags always returns the same value.477	pub async fn tags(&self) -> Result<Vec<String>> {478		if let Some(v) = self.groups.get() {479			return Ok(v.clone());480		}481		let Some(host_config) = &self.host_config else {482			return Ok(vec![]);483		};484		let tags: Vec<String> = nix_go_json!(host_config.tags);485486		let _ = self.groups.set(tags.clone());487488		Ok(tags)489	}490	pub async fn nixos_config(&self) -> Result<Value> {491		if let Some(v) = self.nixos_config.get() {492			return Ok(v.clone());493		}494		let Some(host_config) = &self.host_config else {495			bail!("local host has no nixos_config");496		};497		let nixos_config = nix_go!(host_config.nixos.config);498		assert_warn("nixos config evaluation", &nixos_config).await?;499500		let _ = self.nixos_config.set(nixos_config.clone());501502		Ok(nixos_config)503	}504	pub fn nixos_unchecked_config(&self) -> Result<Value> {505		if let Some(v) = self.nixos_unchecked_config.get() {506			return Ok(v.clone());507		}508		let Some(host_config) = &self.host_config else {509			bail!("local host has no nixos_config");510		};511		let nixos_config = nix_go!(host_config.nixos_unchecked.config);512513		let _ = self.nixos_unchecked_config.set(nixos_config.clone());514515		Ok(nixos_config)516	}517518	pub fn list_defined_secrets(&self) -> Result<Vec<String>> {519		let nixos = self.nixos_unchecked_config()?;520		let secrets = nix_go!(nixos.secrets);521		secrets.list_fields()522	}523524	/// Packages for this host, resolved with nixpkgs overlays525	pub async fn pkgs(&self) -> Result<Value> {526		if let Some(value) = &self.pkgs_override {527			return Ok(value.clone());528		}529		let Some(host_config) = &self.host_config else {530			bail!("local host has no host_config");531		};532		// TODO: Should nixos.options be cached?533		Ok(nix_go!(host_config.nixos.options._module.args.value.pkgs))534	}535}536537impl Config {538	pub async fn tagged_hostnames(&self, tag: &str) -> Result<Vec<String>> {539		let config = &self.config_field;540		let tagged: Vec<String> = nix_go_json!(config.taggedWith[{ tag }]);541		Ok(tagged)542	}543	pub async fn expand_owner_set(&self, owners: Vec<String>) -> Result<BTreeSet<String>> {544		let mut out = BTreeSet::new();545		for owner in owners {546			if let Some(tag) = owner.strip_prefix('@') {547				let hosts = self.tagged_hostnames(tag).await?;548				out.extend(hosts);549			} else {550				out.insert(owner);551			}552		}553		Ok(out)554	}555	pub fn local_host(&self) -> ConfigHost {556		ConfigHost {557			config: self.clone(),558			name: "<virtual localhost>".to_owned(),559			host_config: None,560			nixos_config: OnceCell::new(),561			nixos_unchecked_config: OnceCell::new(),562			groups: {563				let cell = OnceCell::new();564				let _ = cell.set(vec![]);565				cell566			},567			pkgs_override: Some(self.default_pkgs.clone()),568569			local: true,570			session: OnceLock::new(),571			deploy_kind: OnceCell::new(),572			session_destination: OnceCell::new(),573			legacy_ssh_store: OnceCell::new(),574		}575	}576577	pub async fn host(&self, name: &str) -> Result<ConfigHost> {578		let config = &self.config_field;579		let host_config = nix_go!(config.hosts[{ name }]);580581		Ok(ConfigHost {582			config: self.clone(),583			name: name.to_owned(),584			host_config: Some(host_config),585			nixos_config: OnceCell::new(),586			nixos_unchecked_config: OnceCell::new(),587			groups: OnceCell::new(),588			pkgs_override: None,589590			// TODO: Remove with connectivit refactor591			local: self.localhost == name,592			session: OnceLock::new(),593			deploy_kind: OnceCell::new(),594			session_destination: OnceCell::new(),595			legacy_ssh_store: OnceCell::new(),596		})597	}598	pub async fn list_hosts(&self) -> Result<Vec<ConfigHost>> {599		let config = &self.config_field;600		let names = nix_go!(config.hosts).list_fields()?;601		let mut out = vec![];602		for name in names {603			out.push(self.host(&name).await?);604		}605		Ok(out)606	}607	// TODO: Replace usages with .host().nixos_config608	pub async fn system_config(&self, host: &str) -> Result<Value> {609		let fleet_field = &self.config_field;610		Ok(nix_go!(fleet_field.hosts[{ host }].nixos.config))611	}612613	/// Shared secrets configured in fleet.nix or in flake614	pub async fn list_configured_shared(&self) -> Result<Vec<String>> {615		let config_field = &self.config_field;616		nix_go!(config_field.sharedSecrets).list_fields()617	}618	pub fn has_shared(&self, name: &str) -> bool {619		let data = self.data();620		data.secrets.contains(name)621	}622	pub fn replace_shared(&self, name: String, shared: FleetSecretDistribution) {623		let mut data = self.data_mut();624		data.secrets.set_data(name, shared);625	}626	pub fn remove_shared(&self, secret: &str) {627		let mut data = self.data_mut();628		data.secrets.remove(secret);629	}630631	pub fn list_secrets_for_owner(&self, host: &str) -> Vec<String> {632		let data = self.data_mut();633		data.secrets.keys_for_owner(host).cloned().collect()634	}635	pub fn list_secrets(&self) -> Vec<String> {636		let data = self.data_mut();637		data.secrets.keys().cloned().collect()638	}639640	pub fn has_secret(&self, host: &str, secret: &str) -> bool {641		let data = self.data();642		data.secrets.contains_for_owner(secret, host)643	}644	pub fn insert_secret(&self, host: String, secret: String, value: FleetSecretData) {645		let mut data = self.data_mut();646		data.secrets.set_single_data(secret, host, value);647	}648	pub fn remove_secret(&self, host: &str, secret: &str) {649		let mut data = self.data_mut();650		data.secrets.drop_owner_no_reencrypt(secret, host);651	}652653	pub fn host_secret(&self, host: &str, secret: &str) -> Option<FleetSecretDistribution> {654		let data = self.data();655		data.secrets.get_single(secret, host).cloned()656	}657	pub fn shared_secret(&self, secret: &str) -> Option<FleetSecretDistributions> {658		let data = self.data();659		data.secrets.get(secret).cloned()660	}661662	// TODO: Should this be something modifiable from other processes?663	// E.g terraform provider might want to update FleetData (e.g secrets),664	// and current implementation assumes only one process holds current fleet.nix665	// Given that it is no longer needs to be a file for nix evaluation,666	// maybe it can be a .nix file for persistence, but accessible only667	// thru some shared state controller? Might it be stored in terraform668	// state provider?669	pub fn data(&'_ self) -> MutexGuard<'_, FleetData> {670		self.data.lock().unwrap()671	}672	pub fn data_mut(&'_ self) -> MutexGuard<'_, FleetData> {673		self.data.lock().unwrap()674	}675	pub fn save(&self) -> Result<()> {676		let mut tempfile = NamedTempFile::new_in(self.directory.clone()).context("failed to create updated version of fleet.nix in the same directory as original.\nDo you have write access to it? Access only to the fleet.nix won't be enough, the directory is used for atomic overwrite operation.\nIt is not recommended to use fleet by root anyway, move fleet project to your home directory.")?;677		let data = nixlike::serialize(&self.data() as &FleetData)?;678		tempfile.write_all(679			format!(680				"# This file contains fleet state and shouldn't be edited by hand\n\n{data}\n\n# vim: ts=2 et nowrap\n"681			)682			.as_bytes(),683		)?;684		let mut fleet_data_path = self.directory.clone();685		fleet_data_path.push("fleet.nix");686		tempfile.persist(fleet_data_path)?;687		Ok(())688	}689}