difftreelog
refactor split deployment function
in: trunk
1 file changed
cmds/fleet/src/cmds/build_systems.rsdiffbeforeafterboth1use std::{env::current_dir, time::Duration};23use crate::command::MyCommand;4use crate::host::Config;5use anyhow::{anyhow, Result};6use clap::Parser;7use itertools::Itertools;8use tokio::{task::LocalSet, time::sleep};9use tracing::{error, field, info, info_span, warn, Instrument};1011#[derive(Parser, Clone)]12pub struct BuildSystems {13 /// Do not continue on error14 #[clap(long)]15 fail_fast: bool,16 /// Disable automatic rollback17 #[clap(long)]18 disable_rollback: bool,19 /// Run builds as sudo20 #[clap(long)]21 privileged_build: bool,22 #[clap(subcommand)]23 subcommand: Subcommand,24}2526enum UploadAction {27 Test,28 Boot,29 Switch,30}31impl UploadAction {32 fn name(&self) -> &'static str {33 match self {34 UploadAction::Test => "test",35 UploadAction::Boot => "boot",36 UploadAction::Switch => "switch",37 }38 }3940 pub(crate) fn should_switch_profile(&self) -> bool {41 matches!(self, Self::Switch | Self::Boot)42 }43 pub(crate) fn should_activate(&self) -> bool {44 matches!(self, Self::Switch | Self::Test)45 }46 pub(crate) fn should_schedule_rollback_run(&self) -> bool {47 matches!(self, Self::Switch | Self::Test)48 }49}5051enum PackageAction {52 SdImage,53 InstallationCd,54}55impl PackageAction {56 fn build_attr(&self) -> String {57 match self {58 PackageAction::SdImage => "sdImage".to_owned(),59 PackageAction::InstallationCd => "installationCd".to_owned(),60 }61 }62}6364enum Action {65 Upload { action: Option<UploadAction> },66 Package(PackageAction),67}68impl Action {69 fn build_attr(&self) -> String {70 match self {71 Action::Upload { .. } => "toplevel".to_owned(),72 Action::Package(p) => p.build_attr(),73 }74 }75}7677impl From<Subcommand> for Action {78 fn from(s: Subcommand) -> Self {79 match s {80 Subcommand::Upload => Self::Upload { action: None },81 Subcommand::Test => Self::Upload {82 action: Some(UploadAction::Test),83 },84 Subcommand::Boot => Self::Upload {85 action: Some(UploadAction::Boot),86 },87 Subcommand::Switch => Self::Upload {88 action: Some(UploadAction::Switch),89 },90 Subcommand::SdImage => Self::Package(PackageAction::SdImage),91 Subcommand::InstallationCd => Self::Package(PackageAction::InstallationCd),92 }93 }94}9596#[derive(Parser, Clone)]97enum Subcommand {98 /// Upload, but do not switch99 Upload,100 /// Upload + switch to built system until reboot101 Test,102 /// Upload + switch to built system after reboot103 Boot,104 /// Upload + test + boot105 Switch,106107 /// Build SD .img image108 SdImage,109 /// Build an installation cd ISO image110 InstallationCd,111}112113struct Generation {114 id: u32,115 current: bool,116 datetime: String,117}118async fn get_current_generation(config: &Config, host: &str) -> Result<Generation> {119 let mut cmd = MyCommand::new("nix-env");120 cmd.comparg("--profile", "/nix/var/nix/profiles/system")121 .arg("--list-generations");122 // Sudo is required due to --list-generations acquiring lock on the profile.123 let data = config.run_string_on(&host, cmd, true).await?;124 let generations = data125 .split('\n')126 .map(|e| e.trim())127 .filter(|&l| l != "")128 .filter_map(|g| {129 let gen: Option<Generation> = try {130 let mut parts = g.split_whitespace();131 let id = parts.next()?;132 let id: u32 = id.parse().ok()?;133 let date = parts.next()?;134 let time = parts.next()?;135 let current = if let Some(current) = parts.next() {136 if current == "(current)" {137 Some(true)138 } else {139 None140 }141 } else {142 Some(false)143 };144 let current = current?;145 if parts.next().is_some() {146 warn!("unexpected text after generation: {g}");147 }148 Generation {149 id,150 current,151 datetime: format!("{date} {time}"),152 }153 };154 if gen.is_none() {155 warn!("bad generation: {g}")156 }157 gen158 })159 .collect::<Vec<_>>();160 let current = generations161 .into_iter()162 .filter(|g| g.current)163 .at_most_one()164 .map_err(|_e| anyhow!("bad list-generations output"))?165 .ok_or_else(|| anyhow!("failed to find generation"))?;166 Ok(current)167}168169impl BuildSystems {170 async fn build_task(self, config: Config, host: String) -> Result<()> {171 info!("building");172 let action = Action::from(self.subcommand.clone());173 let built = {174 let dir = tempfile::tempdir()?;175 dir.path().to_owned()176 };177178 let mut nix_build = MyCommand::new("nix");179 nix_build180 .args([181 "build",182 "--impure",183 "--json",184 // "--show-trace",185 "--no-link",186 "--option",187 "log-lines",188 "200",189 ])190 .comparg("--out-link", &built)191 .arg(192 config.configuration_attr_name(&format!(193 "buildSystems.{}.{host}",194 action.build_attr()195 )),196 )197 .args(&config.nix_args);198199 if self.privileged_build {200 nix_build = nix_build.sudo();201 }202203 nix_build.run_nix().await.map_err(|e| {204 if action.build_attr() == "sdImage" {205 info!("sd-image build failed");206 info!("Make sure you have imported modulesPath/installer/sd-card/sd-image-<arch>[-installer].nix (For installer, you may want to check config)");207 info!("This module was automatically imported before, but was removed for better customization")208 }209 e210 })?;211 let built = std::fs::canonicalize(built)?;212213 match action {214 Action::Upload { action } => {215 if !config.is_local(&host) {216 info!("uploading system closure");217 let mut tries = 0;218 loop {219 let mut nix = MyCommand::new("nix");220 nix.arg("copy")221 .arg("--substitute-on-destination")222 .comparg("--to", format!("ssh://root@{host}"))223 .arg(&built);224 match nix.run_nix().await {225 Ok(()) => break,226 Err(e) if tries < 3 => {227 tries += 1;228 warn!("Copy failure ({}/3): {}", tries, e);229 sleep(Duration::from_millis(5000)).await;230 }231 Err(e) => return Err(e),232 }233 }234 }235 if let Some(action) = action {236 let mut failed = false;237 // TODO: Lockfile, to prevent concurrent system switch?238 // TODO: If rollback target exists - bail, it should be removed. Lockfile will not work in case if rollback239 // is scheduler on next boot (default behavior). On current boot - rollback activator will fail due to240 // unit name conflict in systemd-run241 if !self.disable_rollback {242 let _span = info_span!("preparing").entered();243 info!("preparing for rollback");244 let generation = get_current_generation(&config, &host).await?;245 info!(246 "rollback target would be {} {}",247 generation.id, generation.datetime248 );249 {250 let mut cmd = MyCommand::new("sh");251 cmd.arg("-c").arg(format!("mark=$(mktemp -p /etc -t fleet_rollback_marker.XXXXX) && echo -n {} > $mark && mv --no-clobber $mark /etc/fleet_rollback_marker", generation.id));252 if let Err(e) = config.run_on(&host, cmd, true).await {253 error!("failed to set rollback marker: {e}");254 failed = true;255 }256 }257 // Activation script also starts rollback-watchdog.timer, however, it is possible that it won't be started.258 // Kicking it on manually will work best.259 //260 // There wouldn't be conflict, because here we trigger start of the primary service, and systemd will261 // only allow one instance of it.262 if action.should_schedule_rollback_run() {263 let mut cmd = MyCommand::new("systemd-run");264 cmd.comparg("--on-active", "3min")265 .comparg("--unit", "rollback-watchdog-run")266 .arg("systemctl")267 .arg("start")268 .arg("rollback-watchdog.service");269 if let Err(e) = config.run_on(&host, cmd, true).await {270 error!("failed to schedule rollback run: {e}");271 failed = true;272 }273 }274 }275 if action.should_switch_profile() && !failed {276 info!("switching generation");277 let mut cmd = MyCommand::new("nix-env");278 cmd.comparg("--profile", "/nix/var/nix/profiles/system")279 .comparg("--set", &built);280 if let Err(e) = config.run_on(&host, cmd, true).await {281 error!("failed to switch generation: {e}");282 failed = true;283 }284 }285 if action.should_activate() && !failed {286 let _span = info_span!("activating").entered();287 info!("executing activation script");288 let mut switch_script = built.clone();289 switch_script.push("bin");290 switch_script.push("switch-to-configuration");291 let mut cmd = MyCommand::new(switch_script);292 cmd.arg(action.name());293 if let Err(e) = config.run_on(&host, cmd, true).in_current_span().await {294 error!("failed to activate: {e}");295 failed = true;296 }297 }298 if !self.disable_rollback {299 {300 let _span = info_span!("rollback").entered();301 if failed {302 info!("executing rollback");303 let mut cmd = MyCommand::new("systemctl");304 cmd.arg("start").arg("rollback-watchdog.service");305 if let Err(e) = config.run_on(&host, cmd, true).await {306 error!("failed to rollback: {e}");307 }308 } else {309 info!("marking upgrade as successful");310 let mut cmd = MyCommand::new("rm");311 cmd.arg("-f").arg("/etc/fleet_rollback_marker");312 if let Err(e) =313 config.run_on(&host, cmd, true).in_current_span().await314 {315 error!("failed to remove rollback marker. This is bad, as the system will be rolled back by watchdog: {e}")316 }317 }318 }319 {320 let _span = info_span!("disarm").entered();321 info!("disarming watchdog, just in case");322 {323 let mut cmd = MyCommand::new("systemctl");324 cmd.arg("stop").arg("rollback-watchdog.timer");325 if let Err(_e) = config.run_on(&host, cmd, true).await {326 // It is ok, if there was no reboot.327 }328 }329 if action.should_schedule_rollback_run() {330 let mut cmd = MyCommand::new("systemctl");331 cmd.arg("stop").arg("rollback-watchdog-run.timer");332 if let Err(e) = config.run_on(&host, cmd, true).await {333 error!("failed to disarm rollback run: {e}");334 }335 }336 }337 }338 }339 }340 Action::Package(PackageAction::SdImage) => {341 let mut out = current_dir()?;342 out.push(format!("sd-image-{}", host));343344 info!("building sd image to {:?}", out);345 let mut nix_build = MyCommand::new("nix");346 nix_build347 .args(["build", "--impure", "--no-link"])348 .comparg("--out-link", &out)349 .arg(config.configuration_attr_name(&format!("buildSystems.sdImage.{}", host,)))350 .args(&config.nix_args);351 if !self.fail_fast {352 nix_build.arg("--keep-going");353 }354 if self.privileged_build {355 nix_build = nix_build.sudo();356 }357358 nix_build.run_nix().await?;359 }360 Action::Package(PackageAction::InstallationCd) => {361 let mut out = current_dir()?;362 out.push(format!("installation-cd-{}", host));363364 info!("building sd image to {:?}", out);365 let mut nix_build = MyCommand::new("nix");366 nix_build367 .args(["build", "--impure", "--no-link"])368 .comparg("--out-link", &out)369 .arg(370 config.configuration_attr_name(&format!(371 "buildSystems.installationCd.{}",372 host,373 )),374 )375 .args(&config.nix_args);376 if !self.fail_fast {377 nix_build.arg("--keep-going");378 }379 if self.privileged_build {380 nix_build = nix_build.sudo();381 }382383 nix_build.run_nix().await?;384 }385 };386 Ok(())387 }388389 pub async fn run(self, config: &Config) -> Result<()> {390 let hosts = config.list_hosts().await?;391 let set = LocalSet::new();392 let this = &self;393 for host in hosts.iter() {394 if config.should_skip(host) {395 continue;396 }397 let config = config.clone();398 let host = host.clone();399 let this = this.clone();400 let span = info_span!("deployment", host = field::display(&host));401 set.spawn_local(402 (async move {403 match this.build_task(config, host).await {404 Ok(_) => {}405 Err(e) => {406 error!("failed to deploy host: {}", e)407 }408 }409 })410 .instrument(span),411 );412 }413 set.await;414 Ok(())415 }416}