git.delta.rocks / jrsonnet / refs/commits / 53dc93b160c1

difftreelog

feat sign built systems by default

Yaroslav Bolyukin2023-10-22parent: #7a60d07.patch.diff
in: trunk

6 files changed

modifiedcmds/fleet/src/cmds/build_systems.rsdiffbeforeafterboth
before · cmds/fleet/src/cmds/build_systems.rs
1use std::path::PathBuf;2use std::{env::current_dir, time::Duration};34use crate::command::MyCommand;5use crate::host::Config;6use anyhow::{anyhow, Result};7use clap::Parser;8use itertools::Itertools;9use tokio::{task::LocalSet, time::sleep};10use tracing::{error, field, info, info_span, warn, Instrument};1112#[derive(Parser, Clone)]13pub struct BuildSystems {14	/// Do not continue on error15	#[clap(long)]16	fail_fast: bool,17	/// Disable automatic rollback18	#[clap(long)]19	disable_rollback: bool,20	/// Run builds as sudo21	#[clap(long)]22	privileged_build: bool,23	#[clap(subcommand)]24	subcommand: Subcommand,25}2627enum UploadAction {28	Test,29	Boot,30	Switch,31}32impl UploadAction {33	fn name(&self) -> &'static str {34		match self {35			UploadAction::Test => "test",36			UploadAction::Boot => "boot",37			UploadAction::Switch => "switch",38		}39	}4041	pub(crate) fn should_switch_profile(&self) -> bool {42		matches!(self, Self::Switch | Self::Boot)43	}44	pub(crate) fn should_activate(&self) -> bool {45		matches!(self, Self::Switch | Self::Test)46	}47	pub(crate) fn should_schedule_rollback_run(&self) -> bool {48		matches!(self, Self::Switch | Self::Test)49	}50}5152enum PackageAction {53	SdImage,54	InstallationCd,55}56impl PackageAction {57	fn build_attr(&self) -> String {58		match self {59			PackageAction::SdImage => "sdImage".to_owned(),60			PackageAction::InstallationCd => "installationCd".to_owned(),61		}62	}63}6465enum Action {66	Upload { action: Option<UploadAction> },67	Package(PackageAction),68}69impl Action {70	fn build_attr(&self) -> String {71		match self {72			Action::Upload { .. } => "toplevel".to_owned(),73			Action::Package(p) => p.build_attr(),74		}75	}76}7778impl From<Subcommand> for Action {79	fn from(s: Subcommand) -> Self {80		match s {81			Subcommand::Upload => Self::Upload { action: None },82			Subcommand::Test => Self::Upload {83				action: Some(UploadAction::Test),84			},85			Subcommand::Boot => Self::Upload {86				action: Some(UploadAction::Boot),87			},88			Subcommand::Switch => Self::Upload {89				action: Some(UploadAction::Switch),90			},91			Subcommand::SdImage => Self::Package(PackageAction::SdImage),92			Subcommand::InstallationCd => Self::Package(PackageAction::InstallationCd),93		}94	}95}9697#[derive(Parser, Clone)]98enum Subcommand {99	/// Upload, but do not switch100	Upload,101	/// Upload + switch to built system until reboot102	Test,103	/// Upload + switch to built system after reboot104	Boot,105	/// Upload + test + boot106	Switch,107108	/// Build SD .img image109	SdImage,110	/// Build an installation cd ISO image111	InstallationCd,112}113114struct Generation {115	id: u32,116	current: bool,117	datetime: String,118}119async fn get_current_generation(config: &Config, host: &str) -> Result<Generation> {120	let mut cmd = MyCommand::new("nix-env");121	cmd.comparg("--profile", "/nix/var/nix/profiles/system")122		.arg("--list-generations");123	// Sudo is required due to --list-generations acquiring lock on the profile.124	let data = config.run_string_on(&host, cmd, true).await?;125	let generations = data126		.split('\n')127		.map(|e| e.trim())128		.filter(|&l| l != "")129		.filter_map(|g| {130			let gen: Option<Generation> = try {131				let mut parts = g.split_whitespace();132				let id = parts.next()?;133				let id: u32 = id.parse().ok()?;134				let date = parts.next()?;135				let time = parts.next()?;136				let current = if let Some(current) = parts.next() {137					if current == "(current)" {138						Some(true)139					} else {140						None141					}142				} else {143					Some(false)144				};145				let current = current?;146				if parts.next().is_some() {147					warn!("unexpected text after generation: {g}");148				}149				Generation {150					id,151					current,152					datetime: format!("{date} {time}"),153				}154			};155			if gen.is_none() {156				warn!("bad generation: {g}")157			}158			gen159		})160		.collect::<Vec<_>>();161	let current = generations162		.into_iter()163		.filter(|g| g.current)164		.at_most_one()165		.map_err(|_e| anyhow!("bad list-generations output"))?166		.ok_or_else(|| anyhow!("failed to find generation"))?;167	Ok(current)168}169170async fn systemctl_stop(config: &Config, host: &str, unit: &str) -> Result<()> {171	let mut cmd = MyCommand::new("systemctl");172	cmd.arg("stop").arg(unit);173	config.run_on(&host, cmd, true).await174}175176async fn systemctl_start(config: &Config, host: &str, unit: &str) -> Result<()> {177	let mut cmd = MyCommand::new("systemctl");178	cmd.arg("start").arg(unit);179	config.run_on(&host, cmd, true).await180}181182async fn execute_upload(183	build: &BuildSystems,184	config: &Config,185	action: UploadAction,186	host: &str,187	built: PathBuf,188) -> Result<()> {189	let mut failed = false;190	// TODO: Lockfile, to prevent concurrent system switch?191	// TODO: If rollback target exists - bail, it should be removed. Lockfile will not work in case if rollback192	// is scheduler on next boot (default behavior). On current boot - rollback activator will fail due to193	// unit name conflict in systemd-run194	if !build.disable_rollback {195		let _span = info_span!("preparing").entered();196		info!("preparing for rollback");197		let generation = get_current_generation(&config, &host).await?;198		info!(199			"rollback target would be {} {}",200			generation.id, generation.datetime201		);202		{203			let mut cmd = MyCommand::new("sh");204			cmd.arg("-c").arg(format!("mark=$(mktemp -p /etc -t fleet_rollback_marker.XXXXX) && echo -n {} > $mark && mv --no-clobber $mark /etc/fleet_rollback_marker", generation.id));205			if let Err(e) = config.run_on(&host, cmd, true).await {206				error!("failed to set rollback marker: {e}");207				failed = true;208			}209		}210		// Activation script also starts rollback-watchdog.timer, however, it is possible that it won't be started.211		// Kicking it on manually will work best.212		//213		// There wouldn't be conflict, because here we trigger start of the primary service, and systemd will214		// only allow one instance of it.215216		// TODO: We should also watch how this process is going.217		// After running this command, we have less than 3 minutes to deploy everything,218		// if we fail to perform generation switch in time, then we will still call the activation script, and this may break something.219		// Anyway, reboot will still help in this case.220		if action.should_schedule_rollback_run() {221			let mut cmd = MyCommand::new("systemd-run");222			cmd.comparg("--on-active", "3min")223				.comparg("--unit", "rollback-watchdog-run")224				.arg("systemctl")225				.arg("start")226				.arg("rollback-watchdog.service");227			if let Err(e) = config.run_on(&host, cmd, true).await {228				error!("failed to schedule rollback run: {e}");229				failed = true;230			}231		}232	}233	if action.should_switch_profile() && !failed {234		info!("switching generation");235		let mut cmd = MyCommand::new("nix-env");236		cmd.comparg("--profile", "/nix/var/nix/profiles/system")237			.comparg("--set", &built);238		if let Err(e) = config.run_on(&host, cmd, true).await {239			error!("failed to switch generation: {e}");240			failed = true;241		}242	}243	if action.should_activate() && !failed {244		let _span = info_span!("activating").entered();245		info!("executing activation script");246		let mut switch_script = built.clone();247		switch_script.push("bin");248		switch_script.push("switch-to-configuration");249		let mut cmd = MyCommand::new(switch_script);250		cmd.arg(action.name());251		if let Err(e) = config.run_on(&host, cmd, true).in_current_span().await {252			error!("failed to activate: {e}");253			failed = true;254		}255	}256	if !build.disable_rollback {257		if failed {258			info!("executing rollback");259			if let Err(e) = systemctl_start(&config, &host, "rollback-watchdog.service")260				.instrument(info_span!("rollback"))261				.await262			{263				error!("failed to trigger rollback: {e}")264			}265		} else {266			info!("trying to mark upgrade as successful");267			let mut cmd = MyCommand::new("rm");268			cmd.arg("-f").arg("/etc/fleet_rollback_marker");269			if let Err(e) = config.run_on(&host, cmd, true).in_current_span().await {270				error!("failed to remove rollback marker. This is bad, as the system will be rolled back by watchdog: {e}")271			}272		}273		info!("disarming watchdog, just in case");274		if let Err(_e) = systemctl_stop(&config, &host, "rollback-watchdog.timer").await {275			// It is ok, if there was no reboot - then timer might not be running.276		}277		if action.should_schedule_rollback_run() {278			if let Err(e) = systemctl_stop(&config, &host, "rollback-watchdog-run.timer").await {279				error!("failed to disarm rollback run: {e}");280			}281		}282	} else {283		let mut cmd = MyCommand::new("rm");284		cmd.arg("-f").arg("/etc/fleet_rollback_marker");285		if let Err(_e) = config.run_on(&host, cmd, true).in_current_span().await {286			// Marker might not exist, yet better try to remove it.287		}288	}289	Ok(())290}291292impl BuildSystems {293	async fn build_task(self, config: Config, host: String) -> Result<()> {294		info!("building");295		let action = Action::from(self.subcommand.clone());296		let built = {297			let dir = tempfile::tempdir()?;298			dir.path().to_owned()299		};300301		let mut nix_build = MyCommand::new("nix");302		nix_build303			.args([304				"build",305				"--impure",306				"--json",307				// "--show-trace",308				"--no-link",309			])310			.comparg("--out-link", &built)311			.arg(312				config.configuration_attr_name(&format!(313					"buildSystems.{}.{host}",314					action.build_attr()315				)),316			)317			.args(&config.nix_args);318319		if self.privileged_build {320			nix_build = nix_build.sudo();321		}322323		nix_build.run_nix().await.map_err(|e| {324			if action.build_attr() == "sdImage" {325				info!("sd-image build failed");326				info!("Make sure you have imported modulesPath/installer/sd-card/sd-image-<arch>[-installer].nix (For installer, you may want to check config)");327				info!("This module was automatically imported before, but was removed for better customization")328			}329			e330		})?;331		let built = std::fs::canonicalize(built)?;332333		match action {334			Action::Upload { action } => {335				if !config.is_local(&host) {336					info!("uploading system closure");337					let mut tries = 0;338					loop {339						let mut nix = MyCommand::new("nix");340						nix.arg("copy")341							.arg("--substitute-on-destination")342							.comparg("--to", format!("ssh-ng://root@{host}"))343							.arg(&built);344						match nix.run_nix().await {345							Ok(()) => break,346							Err(e) if tries < 3 => {347								tries += 1;348								warn!("Copy failure ({}/3): {}", tries, e);349								sleep(Duration::from_millis(5000)).await;350							}351							Err(e) => return Err(e),352						}353					}354				}355				if let Some(action) = action {356					execute_upload(&self, &config, action, &host, built).await?357				}358			}359			Action::Package(PackageAction::SdImage) => {360				let mut out = current_dir()?;361				out.push(format!("sd-image-{}", host));362363				info!("building sd image to {:?}", out);364				let mut nix_build = MyCommand::new("nix");365				nix_build366					.args(["build", "--impure", "--no-link"])367					.comparg("--out-link", &out)368					.arg(config.configuration_attr_name(&format!("buildSystems.sdImage.{}", host,)))369					.args(&config.nix_args);370				if !self.fail_fast {371					nix_build.arg("--keep-going");372				}373				if self.privileged_build {374					nix_build = nix_build.sudo();375				}376377				nix_build.run_nix().await?;378			}379			Action::Package(PackageAction::InstallationCd) => {380				let mut out = current_dir()?;381				out.push(format!("installation-cd-{}", host));382383				info!("building sd image to {:?}", out);384				let mut nix_build = MyCommand::new("nix");385				nix_build386					.args(["build", "--impure", "--no-link"])387					.comparg("--out-link", &out)388					.arg(389						config.configuration_attr_name(&format!(390							"buildSystems.installationCd.{}",391							host,392						)),393					)394					.args(&config.nix_args);395				if !self.fail_fast {396					nix_build.arg("--keep-going");397				}398				if self.privileged_build {399					nix_build = nix_build.sudo();400				}401402				nix_build.run_nix().await?;403			}404		};405		Ok(())406	}407408	pub async fn run(self, config: &Config) -> Result<()> {409		let hosts = config.list_hosts().await?;410		let set = LocalSet::new();411		let this = &self;412		for host in hosts.iter() {413			if config.should_skip(host) {414				continue;415			}416			let config = config.clone();417			let host = host.clone();418			let this = this.clone();419			let span = info_span!("deployment", host = field::display(&host));420			set.spawn_local(421				(async move {422					match this.build_task(config, host).await {423						Ok(_) => {}424						Err(e) => {425							error!("failed to deploy host: {}", e)426						}427					}428				})429				.instrument(span),430			);431		}432		set.await;433		Ok(())434	}435}
modifiedcmds/fleet/src/command.rsdiffbeforeafterboth
--- a/cmds/fleet/src/command.rs
+++ b/cmds/fleet/src/command.rs
@@ -269,7 +269,7 @@
 								drv = pkg;
 							}
 						}
-						info!(target: "nix","copying {} {} -> {}", drv, from, to);
+						// info!(target: "nix","copying {} {} -> {}", drv, from, to);
 						let span = info_span!("copy", from, to, drv);
 						span.pb_start();
 						self.spans.insert(id, span);
modifiednixos/modules/module-list.nixdiffbeforeafterboth
--- a/nixos/modules/module-list.nix
+++ b/nixos/modules/module-list.nix
@@ -3,4 +3,5 @@
   ../meta.nix
   ../secrets.nix
   ../rollback.nix
+  ../nix-sign.nix
 ]
addednixos/nix-sign.nixdiffbeforeafterboth
--- /dev/null
+++ b/nixos/nix-sign.nix
@@ -0,0 +1,14 @@
+# Required for nix copy in build_systems.rs
+{config, ...}: {
+  # https://github.com/NixOS/nix/issues/3023
+  systemd.services.generate-nix-cache-key = {
+    wantedBy = ["multi-user.target"];
+    serviceConfig.Type = "oneshot";
+    path = [config.nix.package];
+    script = ''
+      [[ -f /etc/nix/private-key ]] && exit
+      nix-store --generate-binary-cache-key ${config.networking.hostName}-1 /etc/nix/private-key /etc/nix/public-key
+    '';
+  };
+  nix.settings.secret-key-files = "/etc/nix/private-key";
+}
modifiednixos/rollback.nixdiffbeforeafterboth
--- a/nixos/rollback.nix
+++ b/nixos/rollback.nix
@@ -1,3 +1,4 @@
+# Tied to build_systems.rs
 {config, ...}: {
   # TODO: Make it work with systemd-initrd approach.
   # In this case we can't just switch generation and re-run activation script, since the root filesystem might not be
addedscripts/install-trusted-cert.shdiffbeforeafterboth
--- /dev/null
+++ b/scripts/install-trusted-cert.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+
+set -eu
+
+pubkey="$(sudo cat /etc/nix/private-key | nix key convert-secret-to-public)"
+echo pubkey = "$pubkey"
+
+edited_conf=$(mktemp)
+
+remote_conf=$(ssh "$1" cat /etc/nix/nix.conf)
+echo remote_conf = \"\"\"
+echo "$remote_conf"
+echo \"\"\"
+echo "$remote_conf" > "$edited_conf"
+sed -i 's/\.  Do not edit it!/\. Then it was altered by install-trusted-cert. Do not edit!/g' "$edited_conf"
+sed -i "s|^trusted-public-keys =.*|& $pubkey|g" "$edited_conf"
+
+echo edited_conf = \"\"\"
+cat "$edited_conf"
+echo \"\"\"
+
+# Make nix.conf editable
+ssh "$1" sudo mv /etc/nix/nix.conf /etc/nix/nix.conf.bk
+ssh "$1" sudo cp /etc/nix/nix.conf.bk /etc/nix/nix.conf
+ssh "$1" "cat | sudo dd of=/etc/nix/nix.conf" < "$edited_conf"
+ssh "$1" sudo systemctl restart nix-daemon