git.delta.rocks / jrsonnet / refs/commits / 741106e60111

difftreelog

feat automatic rollback

Yaroslav Bolyukin2023-10-15parent: #4340a04.patch.diff
in: trunk

10 files changed

modifiedCargo.lockdiffbeforeafterboth
609 "syn 2.0.37",609 "syn 2.0.37",
610]610]
611
612[[package]]
613name = "either"
614version = "1.9.0"
615source = "registry+https://github.com/rust-lang/crates.io-index"
616checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
611617
612[[package]]618[[package]]
613name = "encode_unicode"619name = "encode_unicode"
684 "futures",690 "futures",
685 "hostname",691 "hostname",
686 "indicatif",692 "indicatif",
693 "itertools",
687 "nixlike",694 "nixlike",
688 "once_cell",695 "once_cell",
689 "peg",696 "peg",
1126 "windows-sys 0.48.0",1133 "windows-sys 0.48.0",
1127]1134]
1135
1136[[package]]
1137name = "itertools"
1138version = "0.11.0"
1139source = "registry+https://github.com/rust-lang/crates.io-index"
1140checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
1141dependencies = [
1142 "either",
1143]
11281144
1129[[package]]1145[[package]]
1130name = "itoa"1146name = "itoa"
modifiedCargo.tomldiffbeforeafterboth
1[workspace]1[workspace]
2members = ["crates/*", "cmds/*"]2members = ["crates/*", "cmds/*"]
3resolver = "2"
34
modifiedcmds/fleet/Cargo.tomldiffbeforeafterboth
34futures = "0.3.17"34futures = "0.3.17"
35tracing-indicatif = "0.3.5"35tracing-indicatif = "0.3.5"
36indicatif = "0.17.7"36indicatif = "0.17.7"
37itertools = "0.11.0"
3738
modifiedcmds/fleet/src/cmds/build_systems.rsdiffbeforeafterboth
22
3use crate::command::MyCommand;3use crate::command::MyCommand;
4use crate::host::Config;4use crate::host::Config;
5use anyhow::Result;5use anyhow::{anyhow, Result};
6use clap::Parser;6use clap::Parser;
7use itertools::Itertools;
7use tokio::{task::LocalSet, time::sleep};8use tokio::{task::LocalSet, time::sleep};
8use tracing::{error, field, info, info_span, warn, Instrument};9use tracing::{error, field, info, info_span, warn, Instrument};
910
12 /// Do not continue on error13 /// Do not continue on error
13 #[clap(long)]14 #[clap(long)]
14 fail_fast: bool,15 fail_fast: bool,
16 /// Disable automatic rollback
17 #[clap(long)]
18 disable_rollback: bool,
15 /// Run builds as sudo19 /// Run builds as sudo
16 #[clap(long)]20 #[clap(long)]
17 privileged_build: bool,21 privileged_build: bool,
39 pub(crate) fn should_activate(&self) -> bool {43 pub(crate) fn should_activate(&self) -> bool {
40 matches!(self, Self::Switch | Self::Test)44 matches!(self, Self::Switch | Self::Test)
41 }45 }
46 pub(crate) fn should_schedule_rollback_run(&self) -> bool {
47 matches!(self, Self::Switch | Self::Test)
48 }
42}49}
4350
44enum PackageAction {51enum PackageAction {
103 InstallationCd,110 InstallationCd,
104}111}
112
113struct Generation {
114 id: u32,
115 current: bool,
116 datetime: String,
117}
118async fn get_current_generation(config: &Config, host: &str) -> Result<Generation> {
119 let mut cmd = MyCommand::new("nix-env");
120 cmd.comparg("--profile", "/nix/var/nix/profiles/system")
121 .arg("--list-generations");
122 // Sudo is required due to --list-generations acquiring lock on the profile.
123 let data = config.run_string_on(&host, cmd, true).await?;
124 let generations = data
125 .split('\n')
126 .map(|e| e.trim())
127 .filter(|&l| l != "")
128 .filter_map(|g| {
129 let gen: Option<Generation> = try {
130 let mut parts = g.split_whitespace();
131 let id = parts.next()?;
132 let id: u32 = id.parse().ok()?;
133 let date = parts.next()?;
134 let time = parts.next()?;
135 let current = if let Some(current) = parts.next() {
136 if current == "(current)" {
137 Some(true)
138 } else {
139 None
140 }
141 } else {
142 Some(false)
143 };
144 let current = current?;
145 if parts.next().is_some() {
146 warn!("unexpected text after generation: {g}");
147 }
148 Generation {
149 id,
150 current,
151 datetime: format!("{date} {time}"),
152 }
153 };
154 if gen.is_none() {
155 warn!("bad generation: {g}")
156 }
157 gen
158 })
159 .collect::<Vec<_>>();
160 let current = generations
161 .into_iter()
162 .filter(|g| g.current)
163 .at_most_one()
164 .map_err(|_e| anyhow!("bad list-generations output"))?
165 .ok_or_else(|| anyhow!("failed to find generation"))?;
166 Ok(current)
167}
105168
106impl BuildSystems {169impl BuildSystems {
107 async fn build_task(self, config: Config, host: String) -> Result<()> {170 async fn build_task(self, config: Config, host: String) -> Result<()> {
155 loop {218 loop {
156 let mut nix = MyCommand::new("nix");219 let mut nix = MyCommand::new("nix");
157 nix.arg("copy")220 nix.arg("copy")
221 .arg("--substitute-on-destination")
158 .comparg("--to", format!("ssh://root@{host}"))222 .comparg("--to", format!("ssh://root@{host}"))
159 .arg(&built);223 .arg(&built);
160 match nix.run_nix().await {224 match nix.run_nix().await {
169 }233 }
170 }234 }
171 if let Some(action) = action {235 if let Some(action) = action {
236 let mut failed = false;
237 // TODO: Lockfile, to prevent concurrent system switch?
238 // TODO: If rollback target exists - bail, it should be removed. Lockfile will not work in case if rollback
239 // is scheduler on next boot (default behavior). On current boot - rollback activator will fail due to
240 // unit name conflict in systemd-run
241 if !self.disable_rollback {
242 let _span = info_span!("preparing").entered();
243 info!("preparing for rollback");
244 let generation = get_current_generation(&config, &host).await?;
245 info!(
246 "rollback target would be {} {}",
247 generation.id, generation.datetime
248 );
249 {
250 let mut cmd = MyCommand::new("sh");
251 cmd.arg("-c").arg(format!("mark=$(mktemp -p /etc -t fleet_rollback_marker.XXXXX) && echo -n {} > $mark && mv --no-clobber $mark /etc/fleet_rollback_marker", generation.id));
252 if let Err(e) = config.run_on(&host, cmd, true).await {
253 error!("failed to set rollback marker: {e}");
254 failed = true;
255 }
256 }
257 // Activation script also starts rollback-watchdog.timer, however, it is possible that it won't be started.
258 // Kicking it on manually will work best.
259 //
260 // There wouldn't be conflict, because here we trigger start of the primary service, and systemd will
261 // only allow one instance of it.
262 if action.should_schedule_rollback_run() {
263 let mut cmd = MyCommand::new("systemd-run");
264 cmd.comparg("--on-active", "3min")
265 .comparg("--unit", "rollback-watchdog-run")
266 .arg("systemctl")
267 .arg("start")
268 .arg("rollback-watchdog.service");
269 if let Err(e) = config.run_on(&host, cmd, true).await {
270 error!("failed to schedule rollback run: {e}");
271 failed = true;
272 }
273 }
274 }
172 if action.should_switch_profile() {275 if action.should_switch_profile() && !failed {
173 info!("switching generation");276 info!("switching generation");
174 let mut cmd = MyCommand::new("nix-env");277 let mut cmd = MyCommand::new("nix-env");
175 cmd.comparg("--profile", "/nix/var/nix/profiles/system")278 cmd.comparg("--profile", "/nix/var/nix/profiles/system")
176 .comparg("--set", &built);279 .comparg("--set", &built);
177 config.run_on(&host, cmd, true).await?;280 if let Err(e) = config.run_on(&host, cmd, true).await {
281 error!("failed to switch generation: {e}");
282 failed = true;
283 }
178 }284 }
179 if action.should_activate() {285 if action.should_activate() && !failed {
286 let _span = info_span!("activating").entered();
180 info!("executing activation script");287 info!("executing activation script");
181 let mut switch_script = built.clone();288 let mut switch_script = built.clone();
182 switch_script.push("bin");289 switch_script.push("bin");
183 switch_script.push("switch-to-configuration");290 switch_script.push("switch-to-configuration");
184 let mut cmd = MyCommand::new(switch_script);291 let mut cmd = MyCommand::new(switch_script);
185 cmd.arg(action.name());292 cmd.arg(action.name());
186 config.run_on(&host, cmd, true).await?;293 if let Err(e) = config.run_on(&host, cmd, true).in_current_span().await {
294 error!("failed to activate: {e}");
295 failed = true;
296 }
187 }297 }
298 if !self.disable_rollback {
299 {
300 let _span = info_span!("rollback").entered();
301 if failed {
302 info!("executing rollback");
303 let mut cmd = MyCommand::new("systemctl");
304 cmd.arg("start").arg("rollback-watchdog.service");
305 if let Err(e) = config.run_on(&host, cmd, true).await {
306 error!("failed to rollback: {e}");
307 }
308 } else {
309 info!("marking upgrade as successful");
310 let mut cmd = MyCommand::new("rm");
311 cmd.arg("-f").arg("/etc/fleet_rollback_marker");
312 if let Err(e) =
313 config.run_on(&host, cmd, true).in_current_span().await
314 {
315 error!("failed to remove rollback marker. This is bad, as the system will be rolled back by watchdog: {e}")
316 }
317 }
318 }
319 {
320 let _span = info_span!("disarm").entered();
321 info!("disarming watchdog, just in case");
322 {
323 let mut cmd = MyCommand::new("systemctl");
324 cmd.arg("stop").arg("rollback-watchdog.timer");
325 if let Err(_e) = config.run_on(&host, cmd, true).await {
326 // It is ok, if there was no reboot.
327 }
328 }
329 if action.should_schedule_rollback_run() {
330 let mut cmd = MyCommand::new("systemctl");
331 cmd.arg("stop").arg("rollback-watchdog-run.timer");
332 if let Err(e) = config.run_on(&host, cmd, true).await {
333 error!("failed to disarm rollback run: {e}");
334 }
335 }
336 }
337 }
188 }338 }
189 }339 }
190 Action::Package(PackageAction::SdImage) => {340 Action::Package(PackageAction::SdImage) => {
modifiedcmds/fleet/src/command.rsdiffbeforeafterboth
143143
144 pub async fn run_nix_string(self) -> Result<String> {144 pub async fn run_nix_string(self) -> Result<String> {
145 let str = self.clone().into_string();145 let str = self.clone().into_string();
146 let cmd = self.into_command();146 let mut cmd = self.into_command();
147 cmd.arg("--log-format").arg("internal-json");
147 run_nix_inner_stdout(str, cmd, &mut NixHandler::default()).await148 run_nix_inner_stdout(str, cmd, &mut NixHandler::default()).await
148 }149 }
149 pub async fn run_nix(self) -> Result<()> {150 pub async fn run_nix(self) -> Result<()> {
150 let str = self.clone().into_string();151 let str = self.clone().into_string();
151 let mut cmd = self.into_command();152 let mut cmd = self.into_command();
153 cmd.arg("--log-format").arg("internal-json");
152 cmd.stdout(Stdio::inherit());154 cmd.stdout(Stdio::inherit());
153 run_nix_inner(str, cmd, &mut NixHandler::default()).await155 run_nix_inner(str, cmd, &mut NixHandler::default()).await
154 }156 }
410 handler: &mut dyn Handler,412 handler: &mut dyn Handler,
411) -> Result<Option<String>> {413) -> Result<Option<String>> {
412 info!("running {str}");414 info!("running {str}");
413 cmd.arg("--log-format").arg("internal-json");
414 cmd.stderr(Stdio::piped());415 cmd.stderr(Stdio::piped());
415 cmd.stdout(Stdio::piped());416 cmd.stdout(Stdio::piped());
416 let mut child = cmd.spawn()?;417 let mut child = cmd.spawn()?;
modifiedcmds/fleet/src/main.rsdiffbeforeafterboth
1#![feature(try_blocks)]
2
1pub mod cmds;3pub mod cmds;
2pub mod command;4pub mod command;
6mod fleetdata;8mod fleetdata;
79
8use std::ffi::OsString;10use std::ffi::OsString;
9use std::io;
10use std::time::Duration;11use std::time::Duration;
1112
12use anyhow::{anyhow, bail, Result};13use anyhow::{bail, Result};
13use clap::Parser;14use clap::Parser;
1415
15use cmds::{build_systems::BuildSystems, info::Info, secrets::Secrets};16use cmds::{build_systems::BuildSystems, info::Info, secrets::Secrets};
16use host::{Config, FleetOpts};17use host::{Config, FleetOpts};
17use indicatif::{ProgressState, ProgressStyle};18use indicatif::{ProgressState, ProgressStyle};
18use tokio::fs;
19use tokio::process::Command;19use tokio::process::Command;
20use tracing::{info, metadata::LevelFilter};20use tracing::{info, metadata::LevelFilter};
21use tracing_indicatif::IndicatifLayer;21use tracing_indicatif::IndicatifLayer;
80 };80 };
81 Ok(())81 Ok(())
82}82}
83fn elapsed_subsec(state: &ProgressState, writer: &mut dyn std::fmt::Write) {
84 let _ = writer.write_str(&format!("{:?}", state.elapsed()));
85}
8683
87#[tokio::main]84#[tokio::main]
88async fn main() -> Result<()> {85async fn main() -> Result<()> {
modifiedcmds/install-secrets/Cargo.tomldiffbeforeafterboth
9env_logger = "0.10.0"9env_logger = "0.10.0"
10log = "0.4.14"10log = "0.4.14"
11nix = "0.26.1"11nix = "0.26.1"
12serde = "1.0.130"12serde = { version = "1.0.130", features = ["derive"] }
13serde_json = "1.0.89"13serde_json = "1.0.89"
14clap = { version = "4.0.29", features = [14clap = { version = "4.0.29", features = [
15 "derive",15 "derive",
modifiednixos/modules/module-list.nixdiffbeforeafterboth
2 ../fleetPkgs.nix2 ../fleetPkgs.nix
3 ../meta.nix3 ../meta.nix
4 ../secrets.nix4 ../secrets.nix
5 ../rollback.nix
5]6]
67
addednixos/rollback.nixdiffbeforeafterboth

no changes

modifiedpkgs/fleet-install-secrets.nixdiffbeforeafterboth
6 name = "${pname}-${version}";6 name = "${pname}-${version}";
77
8 src = ../.;8 src = ../.;
9 cargoBuildFlags = "-p ${pname}";9 buildAndTestSubdir = "cmds/install-secrets";
10 cargoLock = {10 cargoLock = {
11 lockFile = ../Cargo.lock;11 lockFile = ../Cargo.lock;
12 outputHashes = {12 outputHashes = {