difftreelog
feat ability to select specialisation to activate
in: trunk
6 files changed
Cargo.lockdiffbeforeafterboth--- a/Cargo.lock
+++ b/Cargo.lock
@@ -784,7 +784,7 @@
"itertools",
"nix-eval",
"nixlike",
- "once_cell",
+ "nom",
"openssh",
"owo-colors",
"peg",
cmds/fleet/Cargo.tomldiffbeforeafterboth--- a/cmds/fleet/Cargo.toml
+++ b/cmds/fleet/Cargo.toml
@@ -19,7 +19,6 @@
serde_json.workspace = true
tempfile.workspace = true
time = { version = "0.3", features = ["serde"] }
-once_cell = "1.19"
hostname = "0.4.0"
age-core = "0.10"
peg = "0.8"
@@ -45,6 +44,7 @@
human-repr = { version = "1.1", optional = true }
indicatif = { version = "0.17", optional = true }
nix-eval.workspace = true
+nom = "7.1.3"
[features]
# Not quite stable
cmds/fleet/src/cmds/build_systems.rsdiffbeforeafterboth1use std::{env::current_dir, os::unix::fs::symlink, path::PathBuf, time::Duration};23use anyhow::{anyhow, Result};4use clap::{Parser, ValueEnum};5use itertools::Itertools as _;6use nix_eval::nix_go;7use tokio::{task::LocalSet, time::sleep};8use tracing::{error, field, info, info_span, warn, Instrument};910use crate::{11 command::MyCommand,12 host::{Config, ConfigHost},13};1415#[derive(Parser)]16pub struct Deploy {17 /// Disable automatic rollback18 #[clap(long)]19 disable_rollback: bool,20 /// Action to execute after system is built21 action: DeployAction,22}2324#[derive(ValueEnum, Clone, Copy)]25enum DeployAction {26 /// Upload derivation, but do not execute the update.27 Upload,28 /// Upload and execute the activation script, old version will be used after reboot.29 Test,30 /// Upload and set as current system profile, but do not execute activation script.31 Boot,32 /// Upload, set current profile, and execute activation script.33 Switch,34}3536impl DeployAction {37 pub(crate) fn name(&self) -> Option<&'static str> {38 match self {39 DeployAction::Upload => None,40 DeployAction::Test => Some("test"),41 DeployAction::Boot => Some("boot"),42 DeployAction::Switch => Some("switch"),43 }44 }45 pub(crate) fn should_switch_profile(&self) -> bool {46 matches!(self, Self::Switch | Self::Boot)47 }48 pub(crate) fn should_activate(&self) -> bool {49 matches!(self, Self::Switch | Self::Test)50 }51 pub(crate) fn should_create_rollback_marker(&self) -> bool {52 // Upload does nothing on the target machine, other than uploading the closure.53 // In boot case we want to have rollback marker prepared, so that the system may rollback itself on the next boot.54 !matches!(self, Self::Upload)55 }56 pub(crate) fn should_schedule_rollback_run(&self) -> bool {57 matches!(self, Self::Switch | Self::Test)58 }59}6061#[derive(Parser, Clone)]62pub struct BuildSystems {63 /// Attribute to build. Systems are deployed from "toplevel" attr, well-known used attributes64 /// are "sdImage"/"isoImage", and your configuration may include any other build attributes.65 #[clap(long, default_value = "toplevel")]66 build_attr: String,67}6869struct Generation {70 id: u32,71 current: bool,72 datetime: String,73}74async fn get_current_generation(host: &ConfigHost) -> Result<Generation> {75 let mut cmd = host.cmd("nix-env").await?;76 cmd.comparg("--profile", "/nix/var/nix/profiles/system")77 .arg("--list-generations");78 // Sudo is required due to --list-generations acquiring lock on the profile.79 let data = cmd.sudo().run_string().await?;80 let generations = data81 .split('\n')82 .map(|e| e.trim())83 .filter(|&l| !l.is_empty())84 .filter_map(|g| {85 let gen: Option<Generation> = try {86 let mut parts = g.split_whitespace();87 let id = parts.next()?;88 let id: u32 = id.parse().ok()?;89 let date = parts.next()?;90 let time = parts.next()?;91 let current = if let Some(current) = parts.next() {92 if current == "(current)" {93 Some(true)94 } else {95 None96 }97 } else {98 Some(false)99 };100 let current = current?;101 if parts.next().is_some() {102 warn!("unexpected text after generation: {g}");103 }104 Generation {105 id,106 current,107 datetime: format!("{date} {time}"),108 }109 };110 if gen.is_none() {111 warn!("bad generation: {g}")112 }113 gen114 })115 .collect::<Vec<_>>();116 let current = generations117 .into_iter()118 .filter(|g| g.current)119 .at_most_one()120 .map_err(|_e| anyhow!("bad list-generations output"))?121 .ok_or_else(|| anyhow!("failed to find generation"))?;122 Ok(current)123}124125async fn deploy_task(126 action: DeployAction,127 host: &ConfigHost,128 built: PathBuf,129 disable_rollback: bool,130) -> Result<()> {131 let mut failed = false;132 // TODO: Lockfile, to prevent concurrent system switch?133 // TODO: If rollback target exists - bail, it should be removed. Lockfile will not work in case if rollback134 // is scheduler on next boot (default behavior). On current boot - rollback activator will fail due to135 // unit name conflict in systemd-run136 // This code is tied to rollback.nix137 if !disable_rollback && action.should_create_rollback_marker() {138 let _span = info_span!("preparing").entered();139 info!("preparing for rollback");140 let generation = get_current_generation(host).await?;141 info!(142 "rollback target would be {} {}",143 generation.id, generation.datetime144 );145 {146 let mut cmd = host.cmd("sh").await?;147 cmd.arg("-c").arg(format!("mark=$(mktemp -p /etc -t fleet_rollback_marker.XXXXX) && echo -n {} > $mark && mv --no-clobber $mark /etc/fleet_rollback_marker", generation.id));148 if let Err(e) = cmd.sudo().run().await {149 error!("failed to set rollback marker: {e}");150 failed = true;151 }152 }153 // Activation script also starts rollback-watchdog.timer, however, it is possible that it won't be started.154 // Kicking it on manually will work best.155 //156 // There wouldn't be conflict, because here we trigger start of the primary service, and systemd will157 // only allow one instance of it.158159 // TODO: We should also watch how this process is going.160 // After running this command, we have less than 3 minutes to deploy everything,161 // if we fail to perform generation switch in time, then we will still call the activation script, and this may break something.162 // Anyway, reboot will still help in this case.163 if action.should_schedule_rollback_run() {164 let mut cmd = host.cmd("systemd-run").await?;165 cmd.comparg("--on-active", "3min")166 .comparg("--unit", "rollback-watchdog-run")167 .arg("systemctl")168 .arg("start")169 .arg("rollback-watchdog.service");170 if let Err(e) = cmd.sudo().run().await {171 error!("failed to schedule rollback run: {e}");172 failed = true;173 }174 }175 }176177 if action.should_switch_profile() && !failed {178 info!("switching generation");179 let mut cmd = host.cmd("nix-env").await?;180 cmd.comparg("--profile", "/nix/var/nix/profiles/system")181 .comparg("--set", &built);182 if let Err(e) = cmd.sudo().run().await {183 error!("failed to switch generation: {e}");184 failed = true;185 }186 }187188 // FIXME: Connection might be disconnected after activation run189190 if action.should_activate() && !failed {191 let _span = info_span!("activating").entered();192 info!("executing activation script");193 let mut switch_script = built.clone();194 switch_script.push("bin");195 switch_script.push("switch-to-configuration");196 let mut cmd = host.cmd(switch_script).in_current_span().await?;197 cmd.arg(action.name().expect("upload.should_activate == false"));198 if let Err(e) = cmd.sudo().run().in_current_span().await {199 error!("failed to activate: {e}");200 failed = true;201 }202 }203 if action.should_create_rollback_marker() {204 if !disable_rollback {205 if failed {206 if action.should_schedule_rollback_run() {207 info!("executing rollback");208 if let Err(e) = host209 .systemctl_start("rollback-watchdog.service")210 .instrument(info_span!("rollback"))211 .await212 {213 error!("failed to trigger rollback: {e}")214 }215 }216 } else {217 info!("trying to mark upgrade as successful");218 if let Err(e) = host219 .rm_file("/etc/fleet_rollback_marker", true)220 .in_current_span()221 .await222 {223 error!("failed to remove rollback marker. This is bad, as the system will be rolled back by watchdog: {e}")224 }225 }226 info!("disarming watchdog, just in case");227 if let Err(_e) = host.systemctl_stop("rollback-watchdog.timer").await {228 // It is ok, if there was no reboot - then timer might not be running.229 }230 if action.should_schedule_rollback_run() {231 if let Err(e) = host.systemctl_stop("rollback-watchdog-run.timer").await {232 error!("failed to disarm rollback run: {e}");233 }234 }235 } else if let Err(_e) = host236 .rm_file("/etc/fleet_rollback_marker", true)237 .in_current_span()238 .await239 {240 // Marker might not exist, yet better try to remove it.241 }242 }243 Ok(())244}245246async fn build_task(config: Config, host: String, build_attr: &str) -> Result<PathBuf> {247 info!("building");248 let host = config.host(&host).await?;249 // let action = Action::from(self.subcommand.clone());250 let fleet_config = &config.config_field;251 let drv = nix_go!(252 fleet_config.hosts[{ &host.name }]253 .nixosSystem254 .config255 .system256 .build[{ build_attr }]257 );258 let outputs = drv.build().await.map_err(|e| {259 if build_attr == "sdImage" {260 info!("sd-image build failed");261 info!("Make sure you have imported modulesPath/installer/sd-card/sd-image-<arch>[-installer].nix (For installer, you may want to check config)");262 }263 e264 })?;265 let out_output = outputs266 .get("out")267 .ok_or_else(|| anyhow!("system build should produce \"out\" output"))?;268269 Ok(out_output.clone())270}271272impl BuildSystems {273 pub async fn run(self, config: &Config) -> Result<()> {274 let hosts = config.list_hosts().await?;275 let set = LocalSet::new();276 let build_attr = self.build_attr.clone();277 for host in hosts.into_iter() {278 if config.should_skip(&host.name) {279 continue;280 }281 let config = config.clone();282 let span = info_span!("build", host = field::display(&host.name));283 let hostname = host.name;284 let build_attr = build_attr.clone();285 // FIXME: Since the introduction of better-nix-eval,286 // due to single repl used for builds, hosts are waiting for each other to build,287 // instead of building concurrently.288 //289 // Open multiple repls?290 //291 // Create build batcher, which will behave similar to golangs292 // WaitGroup, and start executing once all the build tasks are scheduled?293 // This also allows to cleanup build output, as there will be no longer294 // "waiting for remote machine" messages in the cases when one package is needed for295 // multiple hosts.296 set.spawn_local(297 (async move {298 let built = match build_task(config, hostname.clone(), &build_attr).await {299 Ok(path) => path,300 Err(e) => {301 error!("failed to deploy host: {}", e);302 return;303 }304 };305 // TODO: Handle error306 let mut out = current_dir().expect("cwd exists");307 out.push(format!("built-{}", hostname));308309 info!("linking iso image to {:?}", out);310 if let Err(e) = symlink(built, out) {311 error!("failed to symlink: {e}")312 }313 })314 .instrument(span),315 );316 }317 set.await;318 Ok(())319 }320}321322impl Deploy {323 pub async fn run(self, config: &Config) -> Result<()> {324 let hosts = config.list_hosts().await?;325 let set = LocalSet::new();326 for host in hosts.into_iter() {327 if config.should_skip(&host.name) {328 continue;329 }330 let config = config.clone();331 let span = info_span!("deploy", host = field::display(&host.name));332 let hostname = host.name.clone();333 // FIXME: Fix repl concurrency (see build-systems)334 set.spawn_local(335 (async move {336 let built = match build_task(config.clone(), hostname.clone(), "toplevel").await337 {338 Ok(path) => path,339 Err(e) => {340 error!("failed to deploy host: {}", e);341 return;342 }343 };344 if !config.is_local(&hostname) {345 info!("uploading system closure");346 {347 // TODO: Move to remote_derivation method.348 // Alternatively, nix store make-content-addressed can be used,349 // at least for the first deployment, to provide trusted store key.350 //351 // It is much slower, yet doesn't require root on the deployer machine.352 let mut sign = MyCommand::new("nix");353 // Private key for host machine is registered in nix-sign.nix354 sign.arg("store")355 .arg("sign")356 .comparg("--key-file", "/etc/nix/private-key")357 .arg("-r")358 .arg(&built);359 if let Err(e) = sign.sudo().run_nix().await {360 warn!("Failed to sign store paths: {e}");361 };362 }363 let mut tries = 0;364 loop {365 match host.remote_derivation(&built).await {366 Ok(remote) => {367 assert!(remote == built, "CA derivations aren't implemented");368 break;369 }370 Err(e) if tries < 3 => {371 tries += 1;372 warn!("copy failure ({}/3): {}", tries, e);373 sleep(Duration::from_millis(5000)).await;374 }375 Err(e) => {376 error!("upload failed: {e}");377 return;378 }379 }380 }381 }382 if let Err(e) =383 deploy_task(self.action, &host, built, self.disable_rollback).await384 {385 error!("activation failed: {e}");386 }387 })388 .instrument(span),389 );390 }391 set.await;392 Ok(())393 }394}1use std::{env::current_dir, os::unix::fs::symlink, path::PathBuf, time::Duration};23use anyhow::{anyhow, Result};4use clap::{Parser, ValueEnum};5use itertools::Itertools as _;6use nix_eval::nix_go;7use tokio::{task::LocalSet, time::sleep};8use tracing::{error, field, info, info_span, warn, Instrument};910use crate::{11 command::MyCommand,12 host::{Config, ConfigHost},13};1415#[derive(Parser)]16pub struct Deploy {17 /// Disable automatic rollback18 #[clap(long)]19 disable_rollback: bool,20 /// Action to execute after system is built21 action: DeployAction,22}2324#[derive(ValueEnum, Clone, Copy)]25enum DeployAction {26 /// Upload derivation, but do not execute the update.27 Upload,28 /// Upload and execute the activation script, old version will be used after reboot.29 Test,30 /// Upload and set as current system profile, but do not execute activation script.31 Boot,32 /// Upload, set current profile, and execute activation script.33 Switch,34}3536impl DeployAction {37 pub(crate) fn name(&self) -> Option<&'static str> {38 match self {39 DeployAction::Upload => None,40 DeployAction::Test => Some("test"),41 DeployAction::Boot => Some("boot"),42 DeployAction::Switch => Some("switch"),43 }44 }45 pub(crate) fn should_switch_profile(&self) -> bool {46 matches!(self, Self::Switch | Self::Boot)47 }48 pub(crate) fn should_activate(&self) -> bool {49 matches!(self, Self::Switch | Self::Test)50 }51 pub(crate) fn should_create_rollback_marker(&self) -> bool {52 // Upload does nothing on the target machine, other than uploading the closure.53 // In boot case we want to have rollback marker prepared, so that the system may rollback itself on the next boot.54 !matches!(self, Self::Upload)55 }56 pub(crate) fn should_schedule_rollback_run(&self) -> bool {57 matches!(self, Self::Switch | Self::Test)58 }59}6061#[derive(Parser, Clone)]62pub struct BuildSystems {63 /// Attribute to build. Systems are deployed from "toplevel" attr, well-known used attributes64 /// are "sdImage"/"isoImage", and your configuration may include any other build attributes.65 #[clap(long, default_value = "toplevel")]66 build_attr: String,67}6869struct Generation {70 id: u32,71 current: bool,72 datetime: String,73}74async fn get_current_generation(host: &ConfigHost) -> Result<Generation> {75 let mut cmd = host.cmd("nix-env").await?;76 cmd.comparg("--profile", "/nix/var/nix/profiles/system")77 .arg("--list-generations");78 // Sudo is required due to --list-generations acquiring lock on the profile.79 let data = cmd.sudo().run_string().await?;80 let generations = data81 .split('\n')82 .map(|e| e.trim())83 .filter(|&l| !l.is_empty())84 .filter_map(|g| {85 let gen: Option<Generation> = try {86 let mut parts = g.split_whitespace();87 let id = parts.next()?;88 let id: u32 = id.parse().ok()?;89 let date = parts.next()?;90 let time = parts.next()?;91 let current = if let Some(current) = parts.next() {92 if current == "(current)" {93 Some(true)94 } else {95 None96 }97 } else {98 Some(false)99 };100 let current = current?;101 if parts.next().is_some() {102 warn!("unexpected text after generation: {g}");103 }104 Generation {105 id,106 current,107 datetime: format!("{date} {time}"),108 }109 };110 if gen.is_none() {111 warn!("bad generation: {g}")112 }113 gen114 })115 .collect::<Vec<_>>();116 let current = generations117 .into_iter()118 .filter(|g| g.current)119 .at_most_one()120 .map_err(|_e| anyhow!("bad list-generations output"))?121 .ok_or_else(|| anyhow!("failed to find generation"))?;122 Ok(current)123}124125async fn deploy_task(126 action: DeployAction,127 host: &ConfigHost,128 built: PathBuf,129 specialisation: Option<String>,130 disable_rollback: bool,131) -> Result<()> {132 let mut failed = false;133 // TODO: Lockfile, to prevent concurrent system switch?134 // TODO: If rollback target exists - bail, it should be removed. Lockfile will not work in case if rollback135 // is scheduler on next boot (default behavior). On current boot - rollback activator will fail due to136 // unit name conflict in systemd-run137 // This code is tied to rollback.nix138 if !disable_rollback && action.should_create_rollback_marker() {139 let _span = info_span!("preparing").entered();140 info!("preparing for rollback");141 let generation = get_current_generation(host).await?;142 info!(143 "rollback target would be {} {}",144 generation.id, generation.datetime145 );146 {147 let mut cmd = host.cmd("sh").await?;148 cmd.arg("-c").arg(format!("mark=$(mktemp -p /etc -t fleet_rollback_marker.XXXXX) && echo -n {} > $mark && mv --no-clobber $mark /etc/fleet_rollback_marker", generation.id));149 if let Err(e) = cmd.sudo().run().await {150 error!("failed to set rollback marker: {e}");151 failed = true;152 }153 }154 // Activation script also starts rollback-watchdog.timer, however, it is possible that it won't be started.155 // Kicking it on manually will work best.156 //157 // There wouldn't be conflict, because here we trigger start of the primary service, and systemd will158 // only allow one instance of it.159160 // TODO: We should also watch how this process is going.161 // After running this command, we have less than 3 minutes to deploy everything,162 // if we fail to perform generation switch in time, then we will still call the activation script, and this may break something.163 // Anyway, reboot will still help in this case.164 if action.should_schedule_rollback_run() {165 let mut cmd = host.cmd("systemd-run").await?;166 cmd.comparg("--on-active", "3min")167 .comparg("--unit", "rollback-watchdog-run")168 .arg("systemctl")169 .arg("start")170 .arg("rollback-watchdog.service");171 if let Err(e) = cmd.sudo().run().await {172 error!("failed to schedule rollback run: {e}");173 failed = true;174 }175 }176 }177178 if action.should_switch_profile() && !failed {179 info!("switching generation");180 let mut cmd = host.cmd("nix-env").await?;181 cmd.comparg("--profile", "/nix/var/nix/profiles/system")182 .comparg("--set", &built);183 if let Err(e) = cmd.sudo().run().await {184 error!("failed to switch generation: {e}");185 failed = true;186 }187 }188189 // FIXME: Connection might be disconnected after activation run190191 if action.should_activate() && !failed {192 let _span = info_span!("activating").entered();193 info!("executing activation script");194 let specialised = if let Some(specialisation) = specialisation {195 let mut specialised = built.join("specialisation");196 specialised.push(specialisation);197 specialised198 } else {199 built.clone()200 };201 let switch_script = specialised.join("bin/switch-to-configuration");202 let mut cmd = host.cmd(switch_script).in_current_span().await?;203 cmd.arg(action.name().expect("upload.should_activate == false"));204 if let Err(e) = cmd.sudo().run().in_current_span().await {205 error!("failed to activate: {e}");206 failed = true;207 }208 }209 if action.should_create_rollback_marker() {210 if !disable_rollback {211 if failed {212 if action.should_schedule_rollback_run() {213 info!("executing rollback");214 if let Err(e) = host215 .systemctl_start("rollback-watchdog.service")216 .instrument(info_span!("rollback"))217 .await218 {219 error!("failed to trigger rollback: {e}")220 }221 }222 } else {223 info!("trying to mark upgrade as successful");224 if let Err(e) = host225 .rm_file("/etc/fleet_rollback_marker", true)226 .in_current_span()227 .await228 {229 error!("failed to remove rollback marker. This is bad, as the system will be rolled back by watchdog: {e}")230 }231 }232 info!("disarming watchdog, just in case");233 if let Err(_e) = host.systemctl_stop("rollback-watchdog.timer").await {234 // It is ok, if there was no reboot - then timer might not be running.235 }236 if action.should_schedule_rollback_run() {237 if let Err(e) = host.systemctl_stop("rollback-watchdog-run.timer").await {238 error!("failed to disarm rollback run: {e}");239 }240 }241 } else if let Err(_e) = host242 .rm_file("/etc/fleet_rollback_marker", true)243 .in_current_span()244 .await245 {246 // Marker might not exist, yet better try to remove it.247 }248 }249 Ok(())250}251252async fn build_task(config: Config, host: String, build_attr: &str) -> Result<PathBuf> {253 info!("building");254 let host = config.host(&host).await?;255 // let action = Action::from(self.subcommand.clone());256 let fleet_config = &config.config_field;257 let drv = nix_go!(258 fleet_config.hosts[{ &host.name }]259 .nixosSystem260 .config261 .system262 .build[{ build_attr }]263 );264 let outputs = drv.build().await.inspect_err(|_| {265 if build_attr == "sdImage" {266 info!("sd-image build failed");267 info!("Make sure you have imported modulesPath/installer/sd-card/sd-image-<arch>[-installer].nix (For installer, you may want to check config)");268 }269 })?;270 let out_output = outputs271 .get("out")272 .ok_or_else(|| anyhow!("system build should produce \"out\" output"))?;273274 Ok(out_output.clone())275}276277impl BuildSystems {278 pub async fn run(self, config: &Config) -> Result<()> {279 let hosts = config.list_hosts().await?;280 let set = LocalSet::new();281 let build_attr = self.build_attr.clone();282 for host in hosts.into_iter() {283 if config.should_skip(&host).await? {284 continue;285 }286 let config = config.clone();287 let span = info_span!("build", host = field::display(&host.name));288 let hostname = host.name;289 let build_attr = build_attr.clone();290 // FIXME: Since the introduction of better-nix-eval,291 // due to single repl used for builds, hosts are waiting for each other to build,292 // instead of building concurrently.293 //294 // Open multiple repls?295 //296 // Create build batcher, which will behave similar to golangs297 // WaitGroup, and start executing once all the build tasks are scheduled?298 // This also allows to cleanup build output, as there will be no longer299 // "waiting for remote machine" messages in the cases when one package is needed for300 // multiple hosts.301 set.spawn_local(302 (async move {303 let built = match build_task(config, hostname.clone(), &build_attr).await {304 Ok(path) => path,305 Err(e) => {306 error!("failed to deploy host: {}", e);307 return;308 }309 };310 // TODO: Handle error311 let mut out = current_dir().expect("cwd exists");312 out.push(format!("built-{}", hostname));313314 info!("linking iso image to {:?}", out);315 if let Err(e) = symlink(built, out) {316 error!("failed to symlink: {e}")317 }318 })319 .instrument(span),320 );321 }322 set.await;323 Ok(())324 }325}326327impl Deploy {328 pub async fn run(self, config: &Config) -> Result<()> {329 let hosts = config.list_hosts().await?;330 let set = LocalSet::new();331 for host in hosts.into_iter() {332 if config.should_skip(&host).await? {333 continue;334 }335 let config = config.clone();336 let span = info_span!("deploy", host = field::display(&host.name));337 let hostname = host.name.clone();338 // FIXME: Fix repl concurrency (see build-systems)339 set.spawn_local(340 (async move {341 let built = match build_task(config.clone(), hostname.clone(), "toplevel").await342 {343 Ok(path) => path,344 Err(e) => {345 error!("failed to deploy host: {}", e);346 return;347 }348 };349 if !config.is_local(&hostname) {350 info!("uploading system closure");351 {352 // TODO: Move to remote_derivation method.353 // Alternatively, nix store make-content-addressed can be used,354 // at least for the first deployment, to provide trusted store key.355 //356 // It is much slower, yet doesn't require root on the deployer machine.357 let mut sign = MyCommand::new("nix");358 // Private key for host machine is registered in nix-sign.nix359 sign.arg("store")360 .arg("sign")361 .comparg("--key-file", "/etc/nix/private-key")362 .arg("-r")363 .arg(&built);364 if let Err(e) = sign.sudo().run_nix().await {365 warn!("Failed to sign store paths: {e}");366 };367 }368 let mut tries = 0;369 loop {370 match host.remote_derivation(&built).await {371 Ok(remote) => {372 assert!(remote == built, "CA derivations aren't implemented");373 break;374 }375 Err(e) if tries < 3 => {376 tries += 1;377 warn!("copy failure ({}/3): {}", tries, e);378 sleep(Duration::from_millis(5000)).await;379 }380 Err(e) => {381 error!("upload failed: {e}");382 return;383 }384 }385 }386 }387 if let Err(e) = deploy_task(388 self.action,389 &host,390 built,391 if let Ok(v) = config.action_attr(&host, "specialisation").await {392 v393 } else {394 error!("unreachable? failed to get specialization");395 return;396 },397 self.disable_rollback,398 )399 .await400 {401 error!("activation failed: {e}");402 }403 })404 .instrument(span),405 );406 }407 set.await;408 Ok(())409 }410}cmds/fleet/src/cmds/secrets/mod.rsdiffbeforeafterboth--- a/cmds/fleet/src/cmds/secrets/mod.rs
+++ b/cmds/fleet/src/cmds/secrets/mod.rs
@@ -436,7 +436,7 @@
match self {
Secret::ForceKeys => {
for host in config.list_hosts().await? {
- if config.should_skip(&host.name) {
+ if config.should_skip(&host).await? {
continue;
}
config.key(&host.name).await?;
@@ -639,7 +639,7 @@
}
}
for host in config.list_hosts().await? {
- if config.should_skip(&host.name) {
+ if config.should_skip(&host).await? {
continue;
}
cmds/fleet/src/host.rsdiffbeforeafterboth--- a/cmds/fleet/src/host.rs
+++ b/cmds/fleet/src/host.rs
@@ -1,4 +1,6 @@
use std::{
+ cell::OnceCell,
+ collections::BTreeMap,
env::current_dir,
ffi::{OsStr, OsString},
fmt::Display,
@@ -10,9 +12,16 @@
};
use anyhow::{anyhow, bail, ensure, Context, Result};
-use clap::{ArgGroup, Parser};
+use clap::Parser;
use fleet_shared::SecretData;
use nix_eval::{nix_go, nix_go_json, NixSessionPool, Value};
+use nom::{
+ bytes::complete::take_while1,
+ character::complete::char,
+ combinator::{map, opt},
+ multi::separated_list1,
+ sequence::{preceded, separated_pair},
+};
use openssh::SessionBuilder;
use serde::de::DeserializeOwned;
use tempfile::NamedTempFile;
@@ -53,10 +62,26 @@
pub name: String,
pub local: bool,
pub session: OnceLock<Arc<openssh::Session>>,
+ groups: OnceCell<Vec<String>>,
pub nixos_config: Option<Value>,
}
impl ConfigHost {
+ pub async fn tags(&self) -> Result<Vec<String>> {
+ if let Some(v) = self.groups.get() {
+ return Ok(v.clone());
+ }
+ // TOCTOU is possible here in case if config is changed, but this case is not handled anywhere anyway,
+ // assuming getting tags always returns the same value.
+ let Some(nixos_config) = &self.nixos_config else {
+ return Ok(vec![]);
+ };
+ let tags: Vec<String> = nix_go_json!(nixos_config.tags);
+
+ let _ = self.groups.set(tags.clone());
+
+ Ok(tags)
+ }
async fn open_session(&self) -> Result<Arc<openssh::Session>> {
assert!(!self.local, "do not open ssh connection to local session");
// FIXME: TOCTOU
@@ -217,15 +242,71 @@
}
impl Config {
- pub fn should_skip(&self, host: &str) -> bool {
- if !self.opts.skip.is_empty() {
- self.opts.skip.iter().any(|h| h as &str == host)
- } else if !self.opts.only.is_empty() {
- !self.opts.only.iter().any(|h| h as &str == host)
- } else {
- false
+ pub async fn should_skip(&self, host: &ConfigHost) -> Result<bool> {
+ if !self.opts.skip.is_empty() && self.opts.skip.iter().any(|h| h as &str == host.name) {
+ return Ok(true);
+ }
+ if self.opts.only.is_empty() {
+ return Ok(false);
+ }
+ let mut have_group_matches = false;
+ for item in self.opts.only.iter() {
+ match item {
+ HostItem::Host { name, .. } if *name == host.name => {
+ return Ok(false);
+ }
+ HostItem::Tag { .. } => {
+ have_group_matches = true;
+ }
+ _ => {}
+ }
}
+ if have_group_matches {
+ let host_tags = host.tags().await?;
+ for item in self.opts.only.iter() {
+ match item {
+ HostItem::Tag { name, .. } if host_tags.contains(name) => {
+ return Ok(false);
+ }
+ _ => {}
+ }
+ }
+ }
+ Ok(true)
}
+ pub async fn action_attr(&self, host: &ConfigHost, attr: &str) -> Result<Option<String>> {
+ if self.opts.only.is_empty() {
+ return Ok(None);
+ }
+ let mut have_group_matches = false;
+ for item in self.opts.only.iter() {
+ match item {
+ HostItem::Host { name, attrs }
+ if *name == host.name && attrs.contains_key(attr) =>
+ {
+ return Ok(attrs.get(attr).cloned());
+ }
+ HostItem::Tag { attrs, .. } if attrs.contains_key(attr) => {
+ have_group_matches = true;
+ }
+ _ => {}
+ }
+ }
+ if have_group_matches {
+ let host_tags = host.tags().await?;
+ for item in self.opts.only.iter() {
+ match item {
+ HostItem::Tag { name, attrs }
+ if host_tags.contains(name) && attrs.contains_key(attr) =>
+ {
+ return Ok(attrs.get(attr).cloned());
+ }
+ _ => {}
+ }
+ }
+ }
+ Ok(None)
+ }
pub fn is_local(&self, host: &str) -> bool {
self.opts.localhost.as_ref().map(|s| s as &str) == Some(host)
}
@@ -237,6 +318,11 @@
local: true,
session: OnceLock::new(),
nixos_config: None,
+ groups: {
+ let cell = OnceCell::new();
+ let _ = cell.set(vec![]);
+ cell
+ },
}
}
@@ -249,6 +335,7 @@
local: self.is_local(name),
session: OnceLock::new(),
nixos_config: Some(nixos_config),
+ groups: OnceCell::new(),
})
}
pub async fn list_hosts(&self) -> Result<Vec<ConfigHost>> {
@@ -356,15 +443,59 @@
}
}
+#[derive(Clone)]
+enum HostItem {
+ Host {
+ name: String,
+ attrs: BTreeMap<String, String>,
+ },
+ Tag {
+ name: String,
+ attrs: BTreeMap<String, String>,
+ },
+}
+fn host_item_parser(input: &str) -> Result<HostItem, String> {
+ fn err_to_string(err: nom::Err<nom::error::Error<&str>>) -> String {
+ err.to_string()
+ }
+
+ let (input, is_tag) = map(opt(char('@')), |c| c.is_some())(input).map_err(err_to_string)?;
+ let (input, name) = map(
+ take_while1(|v| v != ',' && v != '?' && v != '@'),
+ str::to_owned,
+ )(input)
+ .map_err(err_to_string)?;
+
+ let kw_item = separated_pair(
+ map(take_while1(|v| v != '&' && v != '='), str::to_owned),
+ char('='),
+ map(take_while1(|v| v != '&'), str::to_owned),
+ );
+ let kw = map(separated_list1(char('&'), kw_item), |vec| {
+ vec.into_iter().collect::<BTreeMap<_, _>>()
+ });
+ let mut opt_kw = map(opt(preceded(char('?'), kw)), Option::unwrap_or_default);
+
+ let (input, attrs) = opt_kw(input).map_err(err_to_string)?;
+
+ if !input.is_empty() {
+ return Err(format!("unexpected trailing input: {input:?}"));
+ }
+ Ok(if is_tag {
+ HostItem::Tag { name, attrs }
+ } else {
+ HostItem::Host { name, attrs }
+ })
+}
+
#[derive(Parser, Clone)]
-#[clap(group = ArgGroup::new("target_hosts"))]
pub struct FleetOpts {
/// All hosts except those would be skipped
- #[clap(long, number_of_values = 1, group = "target_hosts")]
- only: Vec<String>,
+ #[clap(long, number_of_values = 1, value_parser = host_item_parser)]
+ only: Vec<HostItem>,
/// Hosts to skip
- #[clap(long, number_of_values = 1, group = "target_hosts")]
+ #[clap(long, number_of_values = 1)]
skip: Vec<String>,
/// Host, which should be threaten as current machine
flake.lockdiffbeforeafterboth--- a/flake.lock
+++ b/flake.lock
@@ -7,11 +7,11 @@
]
},
"locked": {
- "lastModified": 1720226507,
- "narHash": "sha256-yHVvNsgrpyNTXZBEokL8uyB2J6gB1wEx0KOJzoeZi1A=",
+ "lastModified": 1721699339,
+ "narHash": "sha256-UqtSwU13vpzzM6w8tGghEbA7ObM3NCDzSpz19QQo9XE=",
"owner": "ipetkov",
"repo": "crane",
- "rev": "0aed560c5c0a61c9385bddff471a13036203e11c",
+ "rev": "0081e9c447f3b70822c142908f08ceeb436982b8",
"type": "github"
},
"original": {
@@ -40,11 +40,11 @@
},
"nixpkgs": {
"locked": {
- "lastModified": 1720525988,
- "narHash": "sha256-6Vvrwl2rKrRt5gAYTFlM/pihCwHw8SY2o81TBm7KhIQ=",
+ "lastModified": 1721814637,
+ "narHash": "sha256-L3QkCvxeByJfW45wLkdZ9pL5h9PezOwwfx7G2sRfjiU=",
"owner": "nixos",
"repo": "nixpkgs",
- "rev": "a630e7a8476e51b116f1ca7444dbad20701823d7",
+ "rev": "e0c444a0b8413a31df199052f5714d409dc4c1d0",
"type": "github"
},
"original": {
@@ -68,11 +68,11 @@
},
"nixpkgs-stable-for-tests": {
"locked": {
- "lastModified": 1720386169,
- "narHash": "sha256-NGKVY4PjzwAa4upkGtAMz1npHGoRzWotlSnVlqI40mo=",
+ "lastModified": 1721548954,
+ "narHash": "sha256-7cCC8+Tdq1+3OPyc3+gVo9dzUNkNIQfwSDJ2HSi2u3o=",
"owner": "nixos",
"repo": "nixpkgs",
- "rev": "194846768975b7ad2c4988bdb82572c00222c0d7",
+ "rev": "63d37ccd2d178d54e7fb691d7ec76000740ea24a",
"type": "github"
},
"original": {
@@ -98,11 +98,11 @@
]
},
"locked": {
- "lastModified": 1720491570,
- "narHash": "sha256-PHS2BcQ9kxBpu9GKlDg3uAlrX/ahQOoAiVmwGl6BjD4=",
+ "lastModified": 1721810656,
+ "narHash": "sha256-33UCMmgPL+sz06+iupNkl99hcBABP56ENcxSoKqr0TY=",
"owner": "oxalica",
"repo": "rust-overlay",
- "rev": "b970af40fdc4bd80fd764796c5f97c15e2b564eb",
+ "rev": "a6afdaab4a47d6ecf647a74968e92a51c4a18e5a",
"type": "github"
},
"original": {