difftreelog
feat automatic rollback
in: trunk
10 files changed
Cargo.lockdiffbeforeafterboth--- a/Cargo.lock
+++ b/Cargo.lock
@@ -610,6 +610,12 @@
]
[[package]]
+name = "either"
+version = "1.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
+
+[[package]]
name = "encode_unicode"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -684,6 +690,7 @@
"futures",
"hostname",
"indicatif",
+ "itertools",
"nixlike",
"once_cell",
"peg",
@@ -1127,6 +1134,15 @@
]
[[package]]
+name = "itertools"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
+dependencies = [
+ "either",
+]
+
+[[package]]
name = "itoa"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
Cargo.tomldiffbeforeafterboth--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,2 +1,3 @@
[workspace]
members = ["crates/*", "cmds/*"]
+resolver = "2"
cmds/fleet/Cargo.tomldiffbeforeafterboth--- a/cmds/fleet/Cargo.toml
+++ b/cmds/fleet/Cargo.toml
@@ -34,3 +34,4 @@
futures = "0.3.17"
tracing-indicatif = "0.3.5"
indicatif = "0.17.7"
+itertools = "0.11.0"
cmds/fleet/src/cmds/build_systems.rsdiffbeforeafterboth1use std::{env::current_dir, time::Duration};23use crate::command::MyCommand;4use crate::host::Config;5use anyhow::Result;6use clap::Parser;7use tokio::{task::LocalSet, time::sleep};8use tracing::{error, field, info, info_span, warn, Instrument};910#[derive(Parser, Clone)]11pub struct BuildSystems {12 /// Do not continue on error13 #[clap(long)]14 fail_fast: bool,15 /// Run builds as sudo16 #[clap(long)]17 privileged_build: bool,18 #[clap(subcommand)]19 subcommand: Subcommand,20}2122enum UploadAction {23 Test,24 Boot,25 Switch,26}27impl UploadAction {28 fn name(&self) -> &'static str {29 match self {30 UploadAction::Test => "test",31 UploadAction::Boot => "boot",32 UploadAction::Switch => "switch",33 }34 }3536 pub(crate) fn should_switch_profile(&self) -> bool {37 matches!(self, Self::Switch | Self::Boot)38 }39 pub(crate) fn should_activate(&self) -> bool {40 matches!(self, Self::Switch | Self::Test)41 }42}4344enum PackageAction {45 SdImage,46 InstallationCd,47}48impl PackageAction {49 fn build_attr(&self) -> String {50 match self {51 PackageAction::SdImage => "sdImage".to_owned(),52 PackageAction::InstallationCd => "installationCd".to_owned(),53 }54 }55}5657enum Action {58 Upload { action: Option<UploadAction> },59 Package(PackageAction),60}61impl Action {62 fn build_attr(&self) -> String {63 match self {64 Action::Upload { .. } => "toplevel".to_owned(),65 Action::Package(p) => p.build_attr(),66 }67 }68}6970impl From<Subcommand> for Action {71 fn from(s: Subcommand) -> Self {72 match s {73 Subcommand::Upload => Self::Upload { action: None },74 Subcommand::Test => Self::Upload {75 action: Some(UploadAction::Test),76 },77 Subcommand::Boot => Self::Upload {78 action: Some(UploadAction::Boot),79 },80 Subcommand::Switch => Self::Upload {81 action: Some(UploadAction::Switch),82 },83 Subcommand::SdImage => Self::Package(PackageAction::SdImage),84 Subcommand::InstallationCd => Self::Package(PackageAction::InstallationCd),85 }86 }87}8889#[derive(Parser, Clone)]90enum Subcommand {91 /// Upload, but do not switch92 Upload,93 /// Upload + switch to built system until reboot94 Test,95 /// Upload + switch to built system after reboot96 Boot,97 /// Upload + test + boot98 Switch,99100 /// Build SD .img image101 SdImage,102 /// Build an installation cd ISO image103 InstallationCd,104}105106impl BuildSystems {107 async fn build_task(self, config: Config, host: String) -> Result<()> {108 info!("building");109 let action = Action::from(self.subcommand.clone());110 let built = {111 let dir = tempfile::tempdir()?;112 dir.path().to_owned()113 };114115 let mut nix_build = MyCommand::new("nix");116 nix_build117 .args([118 "build",119 "--impure",120 "--json",121 // "--show-trace",122 "--no-link",123 "--option",124 "log-lines",125 "200",126 ])127 .comparg("--out-link", &built)128 .arg(129 config.configuration_attr_name(&format!(130 "buildSystems.{}.{host}",131 action.build_attr()132 )),133 )134 .args(&config.nix_args);135136 if self.privileged_build {137 nix_build = nix_build.sudo();138 }139140 nix_build.run_nix().await.map_err(|e| {141 if action.build_attr() == "sdImage" {142 info!("sd-image build failed");143 info!("Make sure you have imported modulesPath/installer/sd-card/sd-image-<arch>[-installer].nix (For installer, you may want to check config)");144 info!("This module was automatically imported before, but was removed for better customization")145 }146 e147 })?;148 let built = std::fs::canonicalize(built)?;149150 match action {151 Action::Upload { action } => {152 if !config.is_local(&host) {153 info!("uploading system closure");154 let mut tries = 0;155 loop {156 let mut nix = MyCommand::new("nix");157 nix.arg("copy")158 .comparg("--to", format!("ssh://root@{host}"))159 .arg(&built);160 match nix.run_nix().await {161 Ok(()) => break,162 Err(e) if tries < 3 => {163 tries += 1;164 warn!("Copy failure ({}/3): {}", tries, e);165 sleep(Duration::from_millis(5000)).await;166 }167 Err(e) => return Err(e),168 }169 }170 }171 if let Some(action) = action {172 if action.should_switch_profile() {173 info!("switching generation");174 let mut cmd = MyCommand::new("nix-env");175 cmd.comparg("--profile", "/nix/var/nix/profiles/system")176 .comparg("--set", &built);177 config.run_on(&host, cmd, true).await?;178 }179 if action.should_activate() {180 info!("executing activation script");181 let mut switch_script = built.clone();182 switch_script.push("bin");183 switch_script.push("switch-to-configuration");184 let mut cmd = MyCommand::new(switch_script);185 cmd.arg(action.name());186 config.run_on(&host, cmd, true).await?;187 }188 }189 }190 Action::Package(PackageAction::SdImage) => {191 let mut out = current_dir()?;192 out.push(format!("sd-image-{}", host));193194 info!("building sd image to {:?}", out);195 let mut nix_build = MyCommand::new("nix");196 nix_build197 .args(["build", "--impure", "--no-link"])198 .comparg("--out-link", &out)199 .arg(config.configuration_attr_name(&format!("buildSystems.sdImage.{}", host,)))200 .args(&config.nix_args);201 if !self.fail_fast {202 nix_build.arg("--keep-going");203 }204 if self.privileged_build {205 nix_build = nix_build.sudo();206 }207208 nix_build.run_nix().await?;209 }210 Action::Package(PackageAction::InstallationCd) => {211 let mut out = current_dir()?;212 out.push(format!("installation-cd-{}", host));213214 info!("building sd image to {:?}", out);215 let mut nix_build = MyCommand::new("nix");216 nix_build217 .args(["build", "--impure", "--no-link"])218 .comparg("--out-link", &out)219 .arg(220 config.configuration_attr_name(&format!(221 "buildSystems.installationCd.{}",222 host,223 )),224 )225 .args(&config.nix_args);226 if !self.fail_fast {227 nix_build.arg("--keep-going");228 }229 if self.privileged_build {230 nix_build = nix_build.sudo();231 }232233 nix_build.run_nix().await?;234 }235 };236 Ok(())237 }238239 pub async fn run(self, config: &Config) -> Result<()> {240 let hosts = config.list_hosts().await?;241 let set = LocalSet::new();242 let this = &self;243 for host in hosts.iter() {244 if config.should_skip(host) {245 continue;246 }247 let config = config.clone();248 let host = host.clone();249 let this = this.clone();250 let span = info_span!("deployment", host = field::display(&host));251 set.spawn_local(252 (async move {253 match this.build_task(config, host).await {254 Ok(_) => {}255 Err(e) => {256 error!("failed to deploy host: {}", e)257 }258 }259 })260 .instrument(span),261 );262 }263 set.await;264 Ok(())265 }266}cmds/fleet/src/command.rsdiffbeforeafterboth--- a/cmds/fleet/src/command.rs
+++ b/cmds/fleet/src/command.rs
@@ -143,12 +143,14 @@
pub async fn run_nix_string(self) -> Result<String> {
let str = self.clone().into_string();
- let cmd = self.into_command();
+ let mut cmd = self.into_command();
+ cmd.arg("--log-format").arg("internal-json");
run_nix_inner_stdout(str, cmd, &mut NixHandler::default()).await
}
pub async fn run_nix(self) -> Result<()> {
let str = self.clone().into_string();
let mut cmd = self.into_command();
+ cmd.arg("--log-format").arg("internal-json");
cmd.stdout(Stdio::inherit());
run_nix_inner(str, cmd, &mut NixHandler::default()).await
}
@@ -410,7 +412,6 @@
handler: &mut dyn Handler,
) -> Result<Option<String>> {
info!("running {str}");
- cmd.arg("--log-format").arg("internal-json");
cmd.stderr(Stdio::piped());
cmd.stdout(Stdio::piped());
let mut child = cmd.spawn()?;
cmds/fleet/src/main.rsdiffbeforeafterboth--- a/cmds/fleet/src/main.rs
+++ b/cmds/fleet/src/main.rs
@@ -1,3 +1,5 @@
+#![feature(try_blocks)]
+
pub mod cmds;
pub mod command;
pub mod host;
@@ -6,16 +8,14 @@
mod fleetdata;
use std::ffi::OsString;
-use std::io;
use std::time::Duration;
-use anyhow::{anyhow, bail, Result};
+use anyhow::{bail, Result};
use clap::Parser;
use cmds::{build_systems::BuildSystems, info::Info, secrets::Secrets};
use host::{Config, FleetOpts};
use indicatif::{ProgressState, ProgressStyle};
-use tokio::fs;
use tokio::process::Command;
use tracing::{info, metadata::LevelFilter};
use tracing_indicatif::IndicatifLayer;
@@ -79,9 +79,6 @@
Opts::Prefetch(p) => p.run(config).await?,
};
Ok(())
-}
-fn elapsed_subsec(state: &ProgressState, writer: &mut dyn std::fmt::Write) {
- let _ = writer.write_str(&format!("{:?}", state.elapsed()));
}
#[tokio::main]
cmds/install-secrets/Cargo.tomldiffbeforeafterboth--- a/cmds/install-secrets/Cargo.toml
+++ b/cmds/install-secrets/Cargo.toml
@@ -9,7 +9,7 @@
env_logger = "0.10.0"
log = "0.4.14"
nix = "0.26.1"
-serde = "1.0.130"
+serde = { version = "1.0.130", features = ["derive"] }
serde_json = "1.0.89"
clap = { version = "4.0.29", features = [
"derive",
nixos/modules/module-list.nixdiffbeforeafterboth--- a/nixos/modules/module-list.nix
+++ b/nixos/modules/module-list.nix
@@ -2,4 +2,5 @@
../fleetPkgs.nix
../meta.nix
../secrets.nix
+ ../rollback.nix
]
nixos/rollback.nixdiffbeforeafterboth--- /dev/null
+++ b/nixos/rollback.nix
@@ -0,0 +1,45 @@
+{config, ...}: {
+ # TODO: Make it work with systemd-initrd approach.
+ # In this case we can't just switch generation and re-run activation script, since the root filesystem might not be
+ # mounted yet. We need to explicitly remove the last generation, and this needs deeper integration with systemd/grub/
+ # whatever user uses. boot.json also might help here.
+
+ systemd.services.rollback-watchdog = {
+ description = "Rollback watchdog";
+ script = ''
+ set -eu
+ if [ -f /etc/fleet_rollback_marker ]; then
+ echo "found the rollback marker, switching to older generation"
+ target=$(cat /etc/fleet_rollback_marker)
+ echo "rolling back profile"
+ nix profile rollback --profile /nix/var/nix/profiles/system --to "$target"
+ echo "executing activation script"
+ "/nix/var/nix/profiles/system-$target-link/bin/switch-to-configuration" switch
+ echo "removing rollback marker"
+ rm -f /etc/fleet_rollback_marker
+ else
+ echo "rollback marker was removed, upgrade is succeeded"
+ fi
+ '';
+ path = [
+ # Should have nix-command support
+ config.nix.package
+ ];
+ serviceConfig.Type = "exec";
+ unitConfig = {
+ X-StopOnRemoval = false;
+ };
+ };
+
+ systemd.timers.rollback-watchdog = {
+ description = "Timer for rollback watchdog";
+ wantedBy = ["timers.target"];
+ timerConfig = {
+ OnUnitActiveSec = "3min";
+ RemainAfterElapse = false;
+ };
+ unitConfig = {
+ ConditionPathExists = "/etc/fleet_rollback_marker";
+ };
+ };
+}
pkgs/fleet-install-secrets.nixdiffbeforeafterboth--- a/pkgs/fleet-install-secrets.nix
+++ b/pkgs/fleet-install-secrets.nix
@@ -6,7 +6,7 @@
name = "${pname}-${version}";
src = ../.;
- cargoBuildFlags = "-p ${pname}";
+ buildAndTestSubdir = "cmds/install-secrets";
cargoLock = {
lockFile = ../Cargo.lock;
outputHashes = {