From c0c9b96f77be19ac6a6603f9502d58b221edc6db Mon Sep 17 00:00:00 2001 From: Yaroslav Bolyukin Date: Fri, 05 Jan 2024 19:40:05 +0000 Subject: [PATCH] refactor: split build-systems and deploy commands --- --- a/Cargo.toml +++ b/Cargo.toml @@ -5,3 +5,5 @@ [workspace.dependencies] nixlike = { path = "./crates/nixlike" } better-command = { path = "./crates/better-command" } +uuid = { version = "1.3.3", features = ["v4"] } +tokio = { version = "1.33.0", features = ["fs", "rt", "macros", "sync", "time", "rt-multi-thread"] } --- a/cmds/fleet/Cargo.toml +++ b/cmds/fleet/Cargo.toml @@ -8,6 +8,7 @@ [dependencies] nixlike.workspace = true better-command.workspace = true +tokio.workspace = true anyhow = "1.0" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" @@ -27,7 +28,6 @@ "wrap_help", "unicode", ] } -tokio = { version = "1.33.0", features = ["full"] } tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["fmt", "env-filter"] } tokio-util = { version = "0.7.10", features = ["codec"] } --- a/cmds/fleet/src/better_nix_eval.rs +++ b/cmds/fleet/src/better_nix_eval.rs @@ -428,6 +428,7 @@ self.used_fields.extend(e.used_fields); } + #[allow(dead_code)] pub fn session(&self) -> NixSession { let mut session = None; for ele in &self.used_fields { @@ -444,6 +445,7 @@ } session.expect("expr without fields used") } + #[allow(dead_code)] pub fn index_attr(&mut self, s: &str) { let escaped = nixlike::serialize(s).expect("string"); self.out.push('.'); @@ -559,7 +561,9 @@ pub enum Index { Var(String), String(String), + #[allow(dead_code)] Apply(String), + #[allow(dead_code)] Expr(NixExprBuilder), ExprApply(NixExprBuilder), Pipe(NixExprBuilder), @@ -576,6 +580,7 @@ pub fn attr(v: impl AsRef) -> Self { Self::String(v.as_ref().to_owned()) } + #[allow(dead_code)] pub fn apply(v: impl Serialize) -> Self { let serialized = nixlike::serialize(v).expect("invalid value for apply"); Self::Apply(serialized.trim_end().to_owned()) @@ -749,6 +754,7 @@ .await .with_context(|| context("as_json", self.0.full_path.as_deref(), &query)) } + #[allow(dead_code)] pub async fn has_field(&self, name: &str) -> Result { let id = self.0.value.expect("can't list root fields"); let key = nixlike::escape_string(name); @@ -786,6 +792,7 @@ .await .with_context(|| context("type_of", self.0.full_path.as_deref(), &query)) } + #[allow(dead_code)] pub async fn import(&self) -> Result { let import = Self::new(self.0.session.clone(), "import").await?; Ok(nix_go!(self | import)) --- a/cmds/fleet/src/cmds/build_systems.rs +++ b/cmds/fleet/src/cmds/build_systems.rs @@ -6,34 +6,40 @@ use crate::host::{Config, ConfigHost}; use crate::nix_go; use anyhow::{anyhow, Result}; -use clap::Parser; +use clap::{Parser, ValueEnum}; use itertools::Itertools as _; use tokio::{task::LocalSet, time::sleep}; use tracing::{error, field, info, info_span, warn, Instrument}; -#[derive(Parser, Clone)] -pub struct BuildSystems { +#[derive(Parser)] +pub struct Deploy { /// Disable automatic rollback #[clap(long)] disable_rollback: bool, - #[clap(subcommand)] - subcommand: Subcommand, + action: DeployAction, } -enum UploadAction { +#[derive(ValueEnum, Clone, Copy)] +enum DeployAction { + /// Upload derivation, but do not execute the update. + Upload, + /// Upload and execute the activation script, old version will be used after reboot. Test, + /// Upload and set as current system profile, but do not execute activation script. Boot, + /// Upload, set current profile, and execute activation script. Switch, } -impl UploadAction { - fn name(&self) -> &'static str { + +impl DeployAction { + pub(crate) fn name(&self) -> Option<&'static str> { match self { - UploadAction::Test => "test", - UploadAction::Boot => "boot", - UploadAction::Switch => "switch", + DeployAction::Upload => None, + DeployAction::Test => Some("test"), + DeployAction::Boot => Some("boot"), + DeployAction::Switch => Some("switch"), } } - pub(crate) fn should_switch_profile(&self) -> bool { matches!(self, Self::Switch | Self::Boot) } @@ -42,69 +48,15 @@ } pub(crate) fn should_schedule_rollback_run(&self) -> bool { matches!(self, Self::Switch | Self::Test) - } -} - -enum PackageAction { - SdImage, - InstallationCd, -} -impl PackageAction { - fn build_attr(&self) -> String { - match self { - PackageAction::SdImage => "sdImage".to_owned(), - PackageAction::InstallationCd => "isoImage".to_owned(), - } - } -} - -enum Action { - Upload { action: Option }, - Package(PackageAction), -} -impl Action { - fn build_attr(&self) -> String { - match self { - Action::Upload { .. } => "toplevel".to_owned(), - Action::Package(p) => p.build_attr(), - } } } -impl From for Action { - fn from(s: Subcommand) -> Self { - match s { - Subcommand::Upload => Self::Upload { action: None }, - Subcommand::Test => Self::Upload { - action: Some(UploadAction::Test), - }, - Subcommand::Boot => Self::Upload { - action: Some(UploadAction::Boot), - }, - Subcommand::Switch => Self::Upload { - action: Some(UploadAction::Switch), - }, - Subcommand::SdImage => Self::Package(PackageAction::SdImage), - Subcommand::InstallationCd => Self::Package(PackageAction::InstallationCd), - } - } -} - #[derive(Parser, Clone)] -enum Subcommand { - /// Upload, but do not switch - Upload, - /// Upload + switch to built system until reboot - Test, - /// Upload + switch to built system after reboot - Boot, - /// Upload + test + boot - Switch, - - /// Build SD .img image - SdImage, - /// Build an installation cd ISO image - InstallationCd, +pub struct BuildSystems { + /// Attribute to build. Systems are deployed from "toplevel" attr, well-known used attributes + /// are "sdImage"/"isoImage", and your configuration may include any other build attributes. + #[clap(long, default_value = "toplevel")] + build_attr: String, } struct Generation { @@ -163,11 +115,11 @@ Ok(current) } -async fn execute_upload( - build: &BuildSystems, - action: UploadAction, +async fn deploy_task( + action: DeployAction, host: &ConfigHost, built: PathBuf, + disable_rollback: bool, ) -> Result<()> { let mut failed = false; // TODO: Lockfile, to prevent concurrent system switch? @@ -175,7 +127,7 @@ // is scheduler on next boot (default behavior). On current boot - rollback activator will fail due to // unit name conflict in systemd-run // This code is tied to rollback.nix - if !build.disable_rollback { + if !disable_rollback { let _span = info_span!("preparing").entered(); info!("preparing for rollback"); let generation = get_current_generation(host).await?; @@ -235,13 +187,13 @@ switch_script.push("bin"); switch_script.push("switch-to-configuration"); let mut cmd = host.cmd(switch_script).in_current_span().await?; - cmd.arg(action.name()); + cmd.arg(action.name().expect("upload.should_activate == false")); if let Err(e) = cmd.sudo().run().in_current_span().await { error!("failed to activate: {e}"); failed = true; } } - if !build.disable_rollback { + if !disable_rollback { if failed { info!("executing rollback"); if let Err(e) = host @@ -280,97 +232,45 @@ Ok(()) } -impl BuildSystems { - async fn build_task(self, config: Config, host: String) -> Result<()> { - info!("building"); - let host = config.host(&host).await?; - let action = Action::from(self.subcommand.clone()); - let fleet_config = &config.config_field; - let drv = nix_go!( - fleet_config.hosts[{ &host.name }].nixosSystem.config.system.build[{ action.build_attr() }] - ); - let outputs = drv.build().await.map_err(|e| { - if action.build_attr() == "sdImage" { +async fn build_task(config: Config, host: String, build_attr: &str) -> Result { + info!("building"); + let host = config.host(&host).await?; + // let action = Action::from(self.subcommand.clone()); + let fleet_config = &config.config_field; + let drv = nix_go!( + fleet_config.hosts[{ &host.name }] + .nixosSystem + .config + .system + .build[{ build_attr }] + ); + let outputs = drv.build().await.map_err(|e| { + if build_attr == "sdImage" { info!("sd-image build failed"); info!("Make sure you have imported modulesPath/installer/sd-card/sd-image-[-installer].nix (For installer, you may want to check config)"); } e })?; - let out_output = outputs - .get("out") - .ok_or_else(|| anyhow!("system build should produce \"out\" output"))?; - - match action { - Action::Upload { action } => { - if !config.is_local(&host.name) { - info!("uploading system closure"); - { - // TODO: Move to remote_derivation method. - // Alternatively, nix store make-content-addressed can be used, - // at least for the first deployment, to provide trusted store key. - // - // It is much slower, yet doesn't require root on the deployer machine. - let mut sign = MyCommand::new("nix"); - // Private key for host machine is registered in nix-sign.nix - sign.arg("store") - .arg("sign") - .comparg("--key-file", "/etc/nix/private-key") - .arg("-r") - .arg(out_output); - if let Err(e) = sign.sudo().run_nix().await { - warn!("Failed to sign store paths: {e}"); - }; - } - let mut tries = 0; - loop { - match host.remote_derivation(out_output).await { - Ok(remote) => { - assert!(&remote == out_output, "CA derivations aren't implemented"); - break; - } - Err(e) if tries < 3 => { - tries += 1; - warn!("Copy failure ({}/3): {}", tries, e); - sleep(Duration::from_millis(5000)).await; - } - Err(e) => return Err(e), - } - } - } - if let Some(action) = action { - execute_upload(&self, action, &host, out_output.clone()).await? - } - } - Action::Package(PackageAction::SdImage) => { - let mut out = current_dir()?; - out.push(format!("sd-image-{}", host.name)); - - info!("linking sd image to {:?}", out); - symlink(out_output, out)?; - } - Action::Package(PackageAction::InstallationCd) => { - let mut out = current_dir()?; - out.push(format!("installation-cd-{}", host.name)); + let out_output = outputs + .get("out") + .ok_or_else(|| anyhow!("system build should produce \"out\" output"))?; - info!("linking iso image to {:?}", out); - symlink(out_output, out)?; - } - }; - Ok(()) - } + Ok(out_output.clone()) +} +impl BuildSystems { pub async fn run(self, config: &Config) -> Result<()> { let hosts = config.list_hosts().await?; let set = LocalSet::new(); - let this = &self; + let build_attr = self.build_attr.clone(); for host in hosts.into_iter() { if config.should_skip(&host.name) { continue; } let config = config.clone(); - let this = this.clone(); - let span = info_span!("deployment", host = field::display(&host.name)); + let span = info_span!("build", host = field::display(&host.name)); let hostname = host.name; + let build_attr = build_attr.clone(); // FIXME: Since the introduction of better-nix-eval, // due to single repl used for builds, hosts are waiting for each other to build, // instead of building concurrently. @@ -384,11 +284,94 @@ // multiple hosts. set.spawn_local( (async move { - match this.build_task(config, hostname).await { - Ok(_) => {} + let built = match build_task(config, hostname.clone(), &build_attr).await { + Ok(path) => path, + Err(e) => { + error!("failed to deploy host: {}", e); + return; + } + }; + // TODO: Handle error + let mut out = current_dir().expect("cwd exists"); + out.push(format!("built-{}", hostname)); + + info!("linking iso image to {:?}", out); + if let Err(e) = symlink(built, out) { + error!("failed to symlink: {e}") + } + }) + .instrument(span), + ); + } + set.await; + Ok(()) + } +} + +impl Deploy { + pub async fn run(self, config: &Config) -> Result<()> { + let hosts = config.list_hosts().await?; + let set = LocalSet::new(); + for host in hosts.into_iter() { + if config.should_skip(&host.name) { + continue; + } + let config = config.clone(); + let span = info_span!("deploy", host = field::display(&host.name)); + let hostname = host.name.clone(); + // FIXME: Fix repl concurrency (see build-systems) + set.spawn_local( + (async move { + let built = match build_task(config.clone(), hostname.clone(), "toplevel").await + { + Ok(path) => path, Err(e) => { - error!("failed to deploy host: {}", e) + error!("failed to deploy host: {}", e); + return; } + }; + if !config.is_local(&hostname) { + info!("uploading system closure"); + { + // TODO: Move to remote_derivation method. + // Alternatively, nix store make-content-addressed can be used, + // at least for the first deployment, to provide trusted store key. + // + // It is much slower, yet doesn't require root on the deployer machine. + let mut sign = MyCommand::new("nix"); + // Private key for host machine is registered in nix-sign.nix + sign.arg("store") + .arg("sign") + .comparg("--key-file", "/etc/nix/private-key") + .arg("-r") + .arg(&built); + if let Err(e) = sign.sudo().run_nix().await { + warn!("Failed to sign store paths: {e}"); + }; + } + let mut tries = 0; + loop { + match host.remote_derivation(&built).await { + Ok(remote) => { + assert!(remote == built, "CA derivations aren't implemented"); + break; + } + Err(e) if tries < 3 => { + tries += 1; + warn!("copy failure ({}/3): {}", tries, e); + sleep(Duration::from_millis(5000)).await; + } + Err(e) => { + error!("upload failed: {e}"); + return; + } + } + } + } + if let Err(e) = + deploy_task(self.action, &host, built, self.disable_rollback).await + { + error!("activation failed: {e}"); } }) .instrument(span), --- a/cmds/fleet/src/cmds/secrets/mod.rs +++ b/cmds/fleet/src/cmds/secrets/mod.rs @@ -7,8 +7,6 @@ use anyhow::{anyhow, bail, ensure, Context, Result}; use chrono::{DateTime, Utc}; use clap::Parser; -use futures::StreamExt; -use itertools::Itertools; use owo_colors::OwoColorize; use serde::Deserialize; use std::{ @@ -570,7 +568,7 @@ config.replace_shared( name.to_owned(), update_owner_set( - &name, + name, config, data, secret, --- a/cmds/fleet/src/host.rs +++ b/cmds/fleet/src/host.rs @@ -14,7 +14,6 @@ use openssh::SessionBuilder; use serde::de::DeserializeOwned; use tempfile::NamedTempFile; -use tracing::instrument; use crate::{ better_nix_eval::{Field, NixSessionPool}, @@ -90,6 +89,7 @@ cmd.arg(path); cmd.run_string().await } + #[allow(dead_code)] pub async fn read_file_json(&self, path: impl AsRef) -> Result { let text = self.read_file_text(path).await?; Ok(serde_json::from_str(&text)?) --- a/cmds/fleet/src/main.rs +++ b/cmds/fleet/src/main.rs @@ -12,14 +12,17 @@ mod fleetdata; use std::ffi::OsString; -use std::io::{stderr, stdout, Write}; use std::process::exit; use std::time::Duration; use anyhow::{bail, Result}; use clap::Parser; -use cmds::{build_systems::BuildSystems, info::Info, secrets::Secret}; +use cmds::{ + build_systems::{BuildSystems, Deploy}, + info::Info, + secrets::Secret, +}; use futures::future::LocalBoxFuture; use futures::stream::FuturesUnordered; use futures::TryStreamExt; @@ -73,6 +76,8 @@ enum Opts { /// Prepare systems for deployments BuildSystems(BuildSystems), + + Deploy(Deploy), /// Secret management #[clap(subcommand)] Secret(Secret), @@ -94,6 +99,7 @@ async fn run_command(config: &Config, command: Opts) -> Result<()> { match command { Opts::BuildSystems(c) => c.run(config).await?, + Opts::Deploy(d) => d.run(config).await?, Opts::Secret(s) => s.run(config).await?, Opts::Info(i) => i.run(config).await?, Opts::Prefetch(p) => p.run(config).await?, --- a/crates/better-command/src/handler.rs +++ b/crates/better-command/src/handler.rs @@ -165,7 +165,7 @@ drv = pkg; } } - // info!(target: "nix","copying {} {} -> {}", drv, from, to); + info!(target: "nix","copying {} {} -> {}", drv, from, to); let span = info_span!("copy", from, to, drv); span.pb_start(); self.spans.insert(id, span); --- a/flake.lock +++ b/flake.lock @@ -38,11 +38,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1703974965, - "narHash": "sha256-dvZjLuAcLnv25bqStTL2ZICC5YSs8aynF5amRM+I6UM=", + "lastModified": 1704409229, + "narHash": "sha256-Vc41cRJ3trOnocovLe0zZE35pK5Lfuo/zHk0xx3CNDY=", "owner": "nixos", "repo": "nixpkgs", - "rev": "9f434bd436e2bb5615827469ed651e30c26daada", + "rev": "786f788914f2a6e94cedf361541894e972b8fd23", "type": "github" }, "original": { @@ -67,11 +67,11 @@ ] }, "locked": { - "lastModified": 1703902408, - "narHash": "sha256-qXdWvu+tlgNjeoz8yQMRKSom6QyRROfgpmeOhwbujqw=", + "lastModified": 1704075545, + "narHash": "sha256-L3zgOuVKhPjKsVLc3yTm2YJ6+BATyZBury7wnhyc8QU=", "owner": "oxalica", "repo": "rust-overlay", - "rev": "319f57cd2c34348c55970a4bf2b35afe82088681", + "rev": "a0df72e106322b67e9c6e591fe870380bd0da0d5", "type": "github" }, "original": { --- a/flake.nix +++ b/flake.nix @@ -29,7 +29,7 @@ llvmPkgs = pkgs.buildPackages.llvmPackages_11; rust = (pkgs.rustChannelOf { - date = "2023-12-29"; + date = "2024-01-01"; channel = "nightly"; }) .default -- gitstuff