git.delta.rocks / jrsonnet / refs/commits / 69498f520d8e

difftreelog

source

crates/fleet-base/src/host.rs20.2 KiBsourcehistory
1use std::{2	collections::{BTreeMap, BTreeSet},3	ffi::{OsStr, OsString},4	fmt::Display,5	io::Write,6	ops::Deref,7	path::PathBuf,8	str::FromStr,9	sync::{Arc, Mutex, MutexGuard, OnceLock},10};1112use anyhow::{Context, Result, anyhow, bail, ensure};13use chrono::{DateTime, Utc};14use fleet_shared::SecretData;15use nix_eval::{Value, nix_go, nix_go_json, util::assert_warn};16use openssh::{ControlPersist, SessionBuilder};17use serde::de::DeserializeOwned;18use tabled::Tabled;19use tempfile::NamedTempFile;20use time::{UtcDateTime, format_description};21use tracing::warn;2223use crate::{24	command::MyCommand,25	fleetdata::{26		FleetData, FleetSecretData, FleetSecretDistribution, FleetSecretPart, SecretOwner,27	},28};2930pub struct FleetConfigInternals {31	pub prefer_identities: BTreeSet<SecretOwner>,32	pub now: DateTime<Utc>,3334	/// Fleet project directory, containing fleet.nix file.35	pub directory: PathBuf,36	/// builtins.currentSystem37	pub local_system: String,38	pub data: Arc<FleetData>,39	pub nix_args: Vec<OsString>,40	/// fleet_config.config41	pub config_field: Value,42	/// flake.output43	pub flake_outputs: Value,44	// TODO: Remove with connectivity refactor45	pub localhost: String,4647	/// import nixpkgs {system = local};48	pub default_pkgs: Value,49	/// inputs.nixpkgs50	pub nixpkgs: Value,51}5253// TODO: Make field not pub54#[derive(Clone)]55pub struct Config(pub Arc<FleetConfigInternals>);5657impl Deref for Config {58	type Target = FleetConfigInternals;5960	fn deref(&self) -> &Self::Target {61		&self.062	}63}6465#[derive(Clone, Copy, Debug)]66pub enum EscalationStrategy {67	Sudo,68	Run0,69	Su,70}7172#[derive(Clone, PartialEq, Copy, Debug)]73pub enum DeployKind {74	/// NixOS => NixOS managed by fleet75	UpgradeToFleet,76	/// NixOS managed by fleet => NixOS managed by fleet77	Fleet,78	/// Remote host has /mnt, /mnt/boot mounted,79	/// generated config is added to fleet configuration.80	NixosInstall,81	/// Remote host has some system and nix installed in multi-user mode (/nix is owned by root),82	/// generated config is added to fleet configuration,83	/// and /etc/NIXOS_LUSTRATE exists, fleet will perform the rest.84	NixosLustrate,85}8687impl FromStr for DeployKind {88	type Err = anyhow::Error;89	fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {90		match s {91			"upgrade-to-fleet" => Ok(Self::UpgradeToFleet),92			"fleet" => Ok(Self::Fleet),93			"nixos-install" => Ok(Self::NixosInstall),94			"nixos-lustrate" => Ok(Self::NixosLustrate),95			v => bail!(96				"unknown deploy_kind: {v}; expected on of \"upgrade-to-fleet\", \"fleet\", \"nixos-install\", \"nixos-lustrate\""97			),98		}99	}100}101pub struct ConfigHost {102	config: Config,103	pub name: String,104	groups: OnceLock<Vec<String>>,105106	// TODO: Both of those values are taken from host opts, there should be a cleaner way to specify it107	deploy_kind: OnceLock<DeployKind>,108	session_destination: OnceLock<String>,109	legacy_ssh_store: OnceLock<bool>,110111	pub host_config: Option<Value>,112	pub nixos_config: OnceLock<Value>,113	pub nixos_unchecked_config: OnceLock<Value>,114	pub pkgs_override: Option<Value>,115116	// TODO: Move command helpers away with connectivity refactor117	pub local: bool,118	pub session: OnceLock<Arc<openssh::Session>>,119}120121#[derive(Debug, Clone, Copy)]122pub enum GenerationStorage {123	Deployer,124	Machine,125	Pusher,126}127impl GenerationStorage {128	fn prefix(&self) -> &'static str {129		match self {130			GenerationStorage::Deployer => "deployer.",131			GenerationStorage::Machine => "",132			GenerationStorage::Pusher => "pusher.",133		}134	}135}136137#[derive(Tabled, Debug)]138pub struct Generation {139	#[tabled(rename = "ID", format("{}", self.rollback_id()))]140	pub id: u32,141	#[tabled(rename = "Current")]142	pub current: bool,143	#[tabled(rename = "Created at")]144	pub datetime: UtcDateTime,145	#[tabled(format = "{:?}")]146	pub store_path: PathBuf,147	#[tabled(skip)]148	pub location: GenerationStorage,149}150impl Generation {151	pub fn rollback_id(&self) -> String {152		format!("{}{}", self.location.prefix(), self.id)153	}154}155156fn parse_generation_line(g: &str) -> Option<Generation> {157	let mut parts = g.split_whitespace();158	let id = parts.next()?;159	let id: u32 = id.parse().ok()?;160	let date = parts.next()?;161	let time = parts.next()?;162	let current = if let Some(current) = parts.next() {163		if current == "(current)" {164			Some(true)165		} else {166			None167		}168	} else {169		Some(false)170	};171	let current = current?;172	if parts.next().is_some() {173		warn!("unexpected text after generation: {g}");174	}175176	let format = format_description::parse("[year]-[month]-[day] [hour]:[minute]:[second]")177		.expect("valid format");178	let datetime = UtcDateTime::parse(&format!("{date} {time}"), &format).ok()?;179180	Some(Generation {181		id,182		current,183		datetime,184		store_path: PathBuf::new(),185		location: GenerationStorage::Machine,186	})187}188// TODO: Move command helpers away with connectivity refactor189impl ConfigHost {190	pub async fn list_generations(&self, profile: &str) -> Result<Vec<Generation>> {191		let mut cmd = self.cmd("nix-env").await?;192		cmd.comparg("--profile", format!("/nix/var/nix/profiles/{profile}"))193			.arg("--list-generations")194			.env("TZ", "UTC");195		// Sudo is required because --list-generations tries to acquire profile lock196		let data = cmd.sudo().run_string().await?;197		let mut generations = data198			.split('\n')199			.map(|e| e.trim())200			.filter(|&l| !l.is_empty())201			.filter_map(|g| {202				let generation = parse_generation_line(g);203				if generation.is_none() {204					warn!("bad generation: {g}");205				};206				generation207			})208			.collect::<Vec<_>>();209		for ele in generations.iter_mut() {210			let mut cmd = self.cmd("readlink").await?;211			cmd.arg("--")212				.arg(format!("/nix/var/nix/profiles/{profile}-{}-link", ele.id));213			let path = cmd.run_string().await?;214			ele.store_path = PathBuf::from(path.trim_end_matches("\n"));215		}216217		Ok(generations)218	}219220	pub fn set_session_destination(&self, dest: String) {221		self.session_destination222			.set(dest)223			.expect("session destination is already set")224	}225	pub fn set_deploy_kind(&self, kind: DeployKind) {226		self.deploy_kind227			.set(kind)228			.expect("deploy kind is already set");229	}230	pub fn set_legacy_ssh_store(&self, legacy: bool) {231		self.legacy_ssh_store232			.set(legacy)233			.expect("legacy ssh store is already set")234	}235	pub async fn deploy_kind(&self) -> Result<DeployKind> {236		if let Some(kind) = self.deploy_kind.get() {237			return Ok(*kind);238		}239		let is_fleet_managed = match self.file_exists("/etc/FLEET_HOST").await {240			Ok(v) => v,241			Err(e) => {242				bail!("failed to query remote system kind: {e}");243			}244		};245		if !is_fleet_managed {246			bail!(247				"{}",248				indoc::indoc! {"249				host is not marked as managed by fleet250				if you're not trying to lustrate/install system from scratch,251				you should either252					1. manually create /etc/FLEET_HOST file on the target host,253					2. use ?deploy_kind=fleet host argument if you're upgrading from older version of fleet254					3. use ?deploy_kind=upgrade_to_fleet if you're upgrading from plain nixos to fleet-managed nixos255			"}256			);257		}258		// TOCTOU is possible259		let _ = self.deploy_kind.set(DeployKind::Fleet);260		Ok(*self.deploy_kind.get().expect("deploy kind is just set"))261	}262	pub async fn escalation_strategy(&self) -> Result<EscalationStrategy> {263		// Prefer sudo, as run0 has some gotchas with polkit264		// and too many repeating prompts.265		if (self.find_in_path("sudo").await).is_ok() {266			return Ok(EscalationStrategy::Sudo);267		}268		if (self.find_in_path("run0").await).is_ok() {269			return Ok(EscalationStrategy::Run0);270		}271		Ok(EscalationStrategy::Su)272	}273	async fn open_session(&self) -> Result<Arc<openssh::Session>> {274		assert!(!self.local, "do not open ssh connection to local session");275		// FIXME: TOCTOU276		if let Some(session) = &self.session.get() {277			return Ok((*session).clone());278		};279		let mut session = SessionBuilder::default();280		session.control_persist(ControlPersist::ClosedAfterInitialConnection);281282		let dest = self.session_destination.get().unwrap_or(&self.name);283		let session = session284			.connect(&dest)285			.await286			.map_err(|e| anyhow!("ssh error while connecting to {}: {e:#?}", self.name))?;287		let session = Arc::new(session);288		self.session.set(session.clone()).expect("TOCTOU happened");289		Ok(session)290	}291	pub async fn mktemp_dir(&self) -> Result<String> {292		let mut cmd = self.cmd("mktemp").await?;293		cmd.arg("-d");294		let path = cmd.run_string().await?;295		Ok(path.trim_end().to_owned())296	}297	pub async fn file_exists(&self, path: impl AsRef<OsStr>) -> Result<bool> {298		let mut cmd = self.cmd("sh").await?;299		cmd.arg("-c")300			.arg("test -e \"$1\" && echo true || echo false")301			.arg("_")302			.arg(path);303		cmd.run_value().await304	}305	pub async fn read_file_bin(&self, path: impl AsRef<OsStr>) -> Result<Vec<u8>> {306		let mut cmd = self.cmd("cat").await?;307		cmd.arg(path);308		cmd.run_bytes().await309	}310	pub async fn read_file_text(&self, path: impl AsRef<OsStr>) -> Result<String> {311		let mut cmd = self.cmd("cat").await?;312		cmd.arg(path);313		cmd.run_string().await314	}315	pub async fn read_dir(&self, path: impl AsRef<OsStr>) -> Result<Vec<String>> {316		let mut cmd = self.cmd("ls").await?;317		cmd.arg(path);318		let out = cmd.run_string().await?;319		let mut lines = out.split('\n');320		if let Some(last) = lines.next_back() {321			ensure!(last.is_empty(), "output of ls should end with newline");322		}323		Ok(lines.map(ToOwned::to_owned).collect())324	}325	#[allow(dead_code)]326	pub async fn read_file_json<D: DeserializeOwned>(&self, path: impl AsRef<OsStr>) -> Result<D> {327		let text = self.read_file_text(path).await?;328		Ok(serde_json::from_str(&text)?)329	}330	pub async fn read_env(&self, env: &str) -> Result<String> {331		let mut cmd = self.cmd("printenv").await?;332		cmd.arg(env);333		cmd.run_string().await334	}335	pub async fn find_in_path(&self, command: &str) -> Result<String> {336		// // `which` is not a part of coreutils, and it might not exist on machine.337		// let path = self.read_env("PATH").await?;338		// // Assuming delimiter is :, we don't work with windows host, this check will be much339		// // more sophisticated in remowt backend (and quicker, since actual PATH search will be done on remote machine)340		// for ele in path.split(':') {341		// 	let test_path = format!("{ele}/{cmd}");342		// 	test -x etc343		// }344		// let mut cmd = self.cmd("printenv").await?;345		// cmd.arg(env);346		// Ok(cmd.run_string().await?)347		// Assuming this is an environment issue if which doesn't exist, will be fixed with remowt.348		let mut cmd = self349			.cmd_escalation(350				// Not used351				EscalationStrategy::Su,352				"which",353			)354			.await?;355		cmd.arg(command);356		cmd.run_string().await357	}358	pub async fn read_file_value<D: FromStr>(&self, path: impl AsRef<OsStr>) -> Result<D>359	where360		<D as FromStr>::Err: Display,361	{362		let text = self.read_file_text(path).await?;363		D::from_str(&text).map_err(|e| anyhow!("failed to parse value: {e}"))364	}365	pub async fn cmd(&self, cmd: impl AsRef<OsStr>) -> Result<MyCommand> {366		self.cmd_escalation(self.escalation_strategy().await?, cmd)367			.await368	}369	pub async fn cmd_escalation(370		&self,371		escalation: EscalationStrategy,372		cmd: impl AsRef<OsStr>,373	) -> Result<MyCommand> {374		if self.local {375			Ok(MyCommand::new(escalation, cmd))376		} else {377			let session = self.open_session().await?;378			Ok(MyCommand::new_on(escalation, cmd, session))379		}380	}381	pub async fn nix_cmd(&self) -> Result<MyCommand> {382		let mut nix = self.cmd("nix").await?;383		nix.args([384			"--extra-experimental-features",385			"nix-command",386			"--extra-experimental-features",387			"flakes",388		]);389		Ok(nix)390	}391392	pub async fn decrypt(&self, data: SecretData) -> Result<Vec<u8>> {393		ensure!(data.encrypted, "secret is not encrypted");394		let mut cmd = self.cmd("fleet-install-secrets").await?;395		cmd.arg("decrypt").eqarg("--secret", data.to_string());396		let encoded = cmd397			.sudo()398			.run_string()399			.await400			.context("failed to call remote host for decrypt")?;401		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;402		ensure!(!data.encrypted, "secret came out encrypted");403		Ok(data.data)404	}405	pub async fn reencrypt_distribution(406		&self,407		data: &FleetSecretDistribution,408		targets: BTreeSet<SecretOwner>,409		now: DateTime<Utc>,410	) -> Result<FleetSecretDistribution> {411		let mut parts = BTreeMap::new();412		for (part_name, part) in &data.secret.parts {413			parts.insert(414				part_name.clone(),415				if part.raw.encrypted {416					FleetSecretPart {417						raw: self.reencrypt(part.raw.clone(), targets.clone()).await?,418					}419				} else {420					part.clone()421				},422			);423		}424		let secret = FleetSecretData {425			created_at: data.secret.created_at,426			expires_at: data.secret.expires_at,427			generation_data: data.secret.generation_data.clone(),428			parts,429		};430		Ok(FleetSecretDistribution::new(targets, secret, now))431	}432	pub async fn reencrypt(433		&self,434		data: SecretData,435		targets: BTreeSet<SecretOwner>,436	) -> Result<SecretData> {437		ensure!(data.encrypted, "secret is not encrypted");438		let mut cmd = self.cmd("fleet-install-secrets").await?;439		cmd.arg("reencrypt").eqarg("--secret", data.to_string());440		for target in targets {441			let key = self.config.key(&target).await?;442			cmd.eqarg("--targets", key);443		}444		let encoded = cmd445			.sudo()446			.run_string()447			.await448			.context("failed to call remote host for decrypt")?;449		let data: SecretData = encoded.parse().map_err(|e| anyhow!("{e}"))?;450		ensure!(data.encrypted, "secret came out not encrypted");451		Ok(data)452	}453	/// Returns path for futureproofing, as path might change i.e on conversion to CA454	pub async fn remote_derivation(&self, path: &PathBuf) -> Result<PathBuf> {455		if self.local {456			// Path is located locally, thus already trusted.457			return Ok(path.to_owned());458		}459		let mut nix = MyCommand::new(460			// Not used461			EscalationStrategy::Su,462			"nix",463		);464		nix.arg("copy").arg("--substitute-on-destination");465466		let proto = if self.legacy_ssh_store.get().cloned().unwrap_or(false) {467			"ssh"468		} else {469			"ssh-ng"470		};471472		match self.deploy_kind().await? {473			DeployKind::Fleet | DeployKind::UpgradeToFleet | DeployKind::NixosLustrate => {474				nix.comparg("--to", format!("{proto}://{}", self.name));475			}476			DeployKind::NixosInstall => {477				nix478					// Signature checking makes no sense with remote-store store argument set, as we're not even interacting with remote nix daemon479					.arg("--no-check-sigs")480					.comparg(481						"--to",482						format!("{proto}://root@{}?remote-store=/mnt", self.name),483					);484			}485		}486		nix.arg(path);487		nix.run_nix().await.context("nix copy")?;488		Ok(path.to_owned())489	}490	pub async fn systemctl_stop(&self, name: &str) -> Result<()> {491		let mut cmd = self.cmd("systemctl").await?;492		cmd.arg("stop").arg(name);493		cmd.sudo().run().await494	}495	pub async fn systemctl_start(&self, name: &str) -> Result<()> {496		let mut cmd = self.cmd("systemctl").await?;497		cmd.arg("start").arg(name);498		cmd.sudo().run().await499	}500501	pub async fn rm_file(&self, path: impl AsRef<OsStr>, sudo: bool) -> Result<()> {502		let mut cmd = self.cmd("rm").await?;503		cmd.arg("-f").arg(path);504		if sudo {505			cmd = cmd.sudo()506		}507		cmd.run().await508	}509}510511struct HostSecretDefinition(Value);512513impl ConfigHost {514	// TOCTOU is possible here in case if config is changed, but this case is not handled anywhere anyway,515	// assuming getting tags always returns the same value.516	pub fn tags(&self) -> Result<Vec<String>> {517		if let Some(v) = self.groups.get() {518			return Ok(v.clone());519		}520		let Some(host_config) = &self.host_config else {521			return Ok(vec![]);522		};523		let tags: Vec<String> = nix_go_json!(host_config.tags);524525		let _ = self.groups.set(tags.clone());526527		Ok(tags)528	}529	pub fn nixos_config(&self) -> Result<Value> {530		if let Some(v) = self.nixos_config.get() {531			return Ok(v.clone());532		}533		let Some(host_config) = &self.host_config else {534			bail!("local host has no nixos_config");535		};536		let nixos_config = nix_go!(host_config.nixos.config);537		assert_warn("nixos config evaluation", &nixos_config)?;538539		let _ = self.nixos_config.set(nixos_config.clone());540541		Ok(nixos_config)542	}543	pub fn nixos_unchecked_config(&self) -> Result<Value> {544		if let Some(v) = self.nixos_unchecked_config.get() {545			return Ok(v.clone());546		}547		let Some(host_config) = &self.host_config else {548			bail!("local host has no nixos_config");549		};550		let nixos_config = nix_go!(host_config.nixos_unchecked.config);551552		let _ = self.nixos_unchecked_config.set(nixos_config.clone());553554		Ok(nixos_config)555	}556557	pub fn list_defined_secrets(&self) -> Result<Vec<String>> {558		let nixos = self.nixos_unchecked_config()?;559		let secrets = nix_go!(nixos.secrets);560		secrets.list_fields()561	}562563	/// Packages for this host, resolved with nixpkgs overlays564	pub fn pkgs(&self) -> Result<Value> {565		if let Some(value) = &self.pkgs_override {566			return Ok(value.clone());567		}568		let Some(host_config) = &self.host_config else {569			bail!("local host has no host_config");570		};571		// TODO: Should nixos.options be cached?572		Ok(nix_go!(host_config.nixos.options._module.args.value.pkgs))573	}574}575576#[derive(Clone)]577pub struct SharedSecretDefinition(Value);578impl SharedSecretDefinition {579	pub fn expected_owners(&self) -> Result<BTreeSet<SecretOwner>> {580		let secret = &self.0;581		Ok(nix_go_json!(secret.expectedOwners))582	}583	pub fn allow_different(&self) -> Result<bool> {584		let secret = &self.0;585		Ok(nix_go_json!(secret.allowDifferent))586	}587	pub fn regenerate_on_owner_added(&self) -> Result<bool> {588		let secret = &self.0;589		Ok(nix_go_json!(secret.regenerateOnOwnerAdded))590	}591	pub fn regenerate_on_owner_removed(&self) -> Result<bool> {592		let secret = &self.0;593		Ok(nix_go_json!(secret.regenerateOnOwnerRemoved))594	}595	pub fn generator(&self) -> Result<Value> {596		let secret = &self.0;597		Ok(nix_go!(secret.generator))598	}599}600601impl Config {602	pub fn tagged_hostnames(&self, tag: &str) -> Result<Vec<String>> {603		let config = &self.config_field;604		let tagged: Vec<String> = nix_go_json!(config.taggedWith[{ tag }]);605		Ok(tagged)606	}607	pub fn expand_owner_set(&self, owners: Vec<String>) -> Result<BTreeSet<String>> {608		let mut out = BTreeSet::new();609		for owner in owners {610			if let Some(tag) = owner.strip_prefix('@') {611				let hosts = self.tagged_hostnames(tag)?;612				out.extend(hosts);613			} else {614				out.insert(owner);615			}616		}617		Ok(out)618	}619	pub fn local_host(&self) -> ConfigHost {620		ConfigHost {621			config: self.clone(),622			name: "<virtual localhost>".to_owned(),623			host_config: None,624			nixos_config: OnceLock::new(),625			nixos_unchecked_config: OnceLock::new(),626			groups: {627				let cell = OnceLock::new();628				let _ = cell.set(vec![]);629				cell630			},631			pkgs_override: Some(self.default_pkgs.clone()),632633			local: true,634			session: OnceLock::new(),635			deploy_kind: OnceLock::new(),636			session_destination: OnceLock::new(),637			legacy_ssh_store: OnceLock::new(),638		}639	}640641	pub fn host(&self, name: &str) -> Result<ConfigHost> {642		let config = &self.config_field;643		let host_config = nix_go!(config.hosts[{ name }]);644645		Ok(ConfigHost {646			config: self.clone(),647			name: name.to_owned(),648			host_config: Some(host_config),649			nixos_config: OnceLock::new(),650			nixos_unchecked_config: OnceLock::new(),651			groups: OnceLock::new(),652			pkgs_override: None,653654			// TODO: Remove with connectivit refactor655			local: self.localhost == name,656			session: OnceLock::new(),657			deploy_kind: OnceLock::new(),658			session_destination: OnceLock::new(),659			legacy_ssh_store: OnceLock::new(),660		})661	}662	pub fn list_hosts(&self) -> Result<Vec<ConfigHost>> {663		let config = &self.config_field;664		let names = nix_go!(config.hosts).list_fields()?;665		let mut out = vec![];666		for name in names {667			out.push(self.host(&name)?);668		}669		Ok(out)670	}671	// TODO: Replace usages with .host().nixos_config672	pub fn system_config(&self, host: &str) -> Result<Value> {673		let fleet_field = &self.config_field;674		Ok(nix_go!(fleet_field.hosts[{ host }].nixos.config))675	}676677	pub fn secret_definition(&self, secret: &str) -> Result<Option<SharedSecretDefinition>> {678		let config = &self.config_field;679		let shared_secrets = nix_go!(config.secrets);680		if !shared_secrets.has_field(secret)? {681			return Ok(None);682		}683		Ok(Some(SharedSecretDefinition(nix_go!(684			shared_secrets[secret]685		))))686	}687688	pub fn save(&self) -> Result<()> {689		let mut tempfile = NamedTempFile::new_in(self.directory.clone()).context("failed to create updated version of fleet.nix in the same directory as original.\nDo you have write access to it? Access only to the fleet.nix won't be enough, the directory is used for atomic overwrite operation.\nIt is not recommended to use fleet by root anyway, move fleet project to your home directory.")?;690		let data = nixlike::serialize(&*self.data)?;691		tempfile.write_all(692			format!(693				"# This file contains fleet state and shouldn't be edited by hand\n\n{data}\n\n# vim: ts=2 et nowrap\n"694			)695			.as_bytes(),696		)?;697		let mut fleet_data_path = self.directory.clone();698		fleet_data_path.push("fleet.nix");699		tempfile.persist(fleet_data_path)?;700		Ok(())701	}702}