git.delta.rocks / jrsonnet / refs/commits / 7a60d07bbf3e

difftreelog

refactor split deployment function

Yaroslav Bolyukin2023-10-22parent: #e9e8e99.patch.diff
in: trunk

1 file changed

modifiedcmds/fleet/src/cmds/build_systems.rsdiffbeforeafterboth
1use std::path::PathBuf;
1use std::{env::current_dir, time::Duration};2use std::{env::current_dir, time::Duration};
23
3use crate::command::MyCommand;4use crate::command::MyCommand;
166 Ok(current)167 Ok(current)
167}168}
169
170async fn systemctl_stop(config: &Config, host: &str, unit: &str) -> Result<()> {
171 let mut cmd = MyCommand::new("systemctl");
172 cmd.arg("stop").arg(unit);
173 config.run_on(&host, cmd, true).await
174}
175
176async fn systemctl_start(config: &Config, host: &str, unit: &str) -> Result<()> {
177 let mut cmd = MyCommand::new("systemctl");
178 cmd.arg("start").arg(unit);
179 config.run_on(&host, cmd, true).await
180}
181
182async fn execute_upload(
183 build: &BuildSystems,
184 config: &Config,
185 action: UploadAction,
186 host: &str,
187 built: PathBuf,
188) -> Result<()> {
189 let mut failed = false;
190 // TODO: Lockfile, to prevent concurrent system switch?
191 // TODO: If rollback target exists - bail, it should be removed. Lockfile will not work in case if rollback
192 // is scheduler on next boot (default behavior). On current boot - rollback activator will fail due to
193 // unit name conflict in systemd-run
194 if !build.disable_rollback {
195 let _span = info_span!("preparing").entered();
196 info!("preparing for rollback");
197 let generation = get_current_generation(&config, &host).await?;
198 info!(
199 "rollback target would be {} {}",
200 generation.id, generation.datetime
201 );
202 {
203 let mut cmd = MyCommand::new("sh");
204 cmd.arg("-c").arg(format!("mark=$(mktemp -p /etc -t fleet_rollback_marker.XXXXX) && echo -n {} > $mark && mv --no-clobber $mark /etc/fleet_rollback_marker", generation.id));
205 if let Err(e) = config.run_on(&host, cmd, true).await {
206 error!("failed to set rollback marker: {e}");
207 failed = true;
208 }
209 }
210 // Activation script also starts rollback-watchdog.timer, however, it is possible that it won't be started.
211 // Kicking it on manually will work best.
212 //
213 // There wouldn't be conflict, because here we trigger start of the primary service, and systemd will
214 // only allow one instance of it.
215
216 // TODO: We should also watch how this process is going.
217 // After running this command, we have less than 3 minutes to deploy everything,
218 // if we fail to perform generation switch in time, then we will still call the activation script, and this may break something.
219 // Anyway, reboot will still help in this case.
220 if action.should_schedule_rollback_run() {
221 let mut cmd = MyCommand::new("systemd-run");
222 cmd.comparg("--on-active", "3min")
223 .comparg("--unit", "rollback-watchdog-run")
224 .arg("systemctl")
225 .arg("start")
226 .arg("rollback-watchdog.service");
227 if let Err(e) = config.run_on(&host, cmd, true).await {
228 error!("failed to schedule rollback run: {e}");
229 failed = true;
230 }
231 }
232 }
233 if action.should_switch_profile() && !failed {
234 info!("switching generation");
235 let mut cmd = MyCommand::new("nix-env");
236 cmd.comparg("--profile", "/nix/var/nix/profiles/system")
237 .comparg("--set", &built);
238 if let Err(e) = config.run_on(&host, cmd, true).await {
239 error!("failed to switch generation: {e}");
240 failed = true;
241 }
242 }
243 if action.should_activate() && !failed {
244 let _span = info_span!("activating").entered();
245 info!("executing activation script");
246 let mut switch_script = built.clone();
247 switch_script.push("bin");
248 switch_script.push("switch-to-configuration");
249 let mut cmd = MyCommand::new(switch_script);
250 cmd.arg(action.name());
251 if let Err(e) = config.run_on(&host, cmd, true).in_current_span().await {
252 error!("failed to activate: {e}");
253 failed = true;
254 }
255 }
256 if !build.disable_rollback {
257 if failed {
258 info!("executing rollback");
259 if let Err(e) = systemctl_start(&config, &host, "rollback-watchdog.service")
260 .instrument(info_span!("rollback"))
261 .await
262 {
263 error!("failed to trigger rollback: {e}")
264 }
265 } else {
266 info!("trying to mark upgrade as successful");
267 let mut cmd = MyCommand::new("rm");
268 cmd.arg("-f").arg("/etc/fleet_rollback_marker");
269 if let Err(e) = config.run_on(&host, cmd, true).in_current_span().await {
270 error!("failed to remove rollback marker. This is bad, as the system will be rolled back by watchdog: {e}")
271 }
272 }
273 info!("disarming watchdog, just in case");
274 if let Err(_e) = systemctl_stop(&config, &host, "rollback-watchdog.timer").await {
275 // It is ok, if there was no reboot - then timer might not be running.
276 }
277 if action.should_schedule_rollback_run() {
278 if let Err(e) = systemctl_stop(&config, &host, "rollback-watchdog-run.timer").await {
279 error!("failed to disarm rollback run: {e}");
280 }
281 }
282 } else {
283 let mut cmd = MyCommand::new("rm");
284 cmd.arg("-f").arg("/etc/fleet_rollback_marker");
285 if let Err(_e) = config.run_on(&host, cmd, true).in_current_span().await {
286 // Marker might not exist, yet better try to remove it.
287 }
288 }
289 Ok(())
290}
168291
169impl BuildSystems {292impl BuildSystems {
170 async fn build_task(self, config: Config, host: String) -> Result<()> {293 async fn build_task(self, config: Config, host: String) -> Result<()> {
183 "--json",306 "--json",
184 // "--show-trace",307 // "--show-trace",
185 "--no-link",308 "--no-link",
186 "--option",
187 "log-lines",
188 "200",
189 ])309 ])
190 .comparg("--out-link", &built)310 .comparg("--out-link", &built)
191 .arg(311 .arg(
219 let mut nix = MyCommand::new("nix");339 let mut nix = MyCommand::new("nix");
220 nix.arg("copy")340 nix.arg("copy")
221 .arg("--substitute-on-destination")341 .arg("--substitute-on-destination")
222 .comparg("--to", format!("ssh://root@{host}"))342 .comparg("--to", format!("ssh-ng://root@{host}"))
223 .arg(&built);343 .arg(&built);
224 match nix.run_nix().await {344 match nix.run_nix().await {
225 Ok(()) => break,345 Ok(()) => break,
233 }353 }
234 }354 }
235 if let Some(action) = action {355 if let Some(action) = action {
236 let mut failed = false;
237 // TODO: Lockfile, to prevent concurrent system switch?
238 // TODO: If rollback target exists - bail, it should be removed. Lockfile will not work in case if rollback
239 // is scheduler on next boot (default behavior). On current boot - rollback activator will fail due to
240 // unit name conflict in systemd-run
241 if !self.disable_rollback {
242 let _span = info_span!("preparing").entered();
243 info!("preparing for rollback");
244 let generation = get_current_generation(&config, &host).await?;
245 info!(
246 "rollback target would be {} {}",
247 generation.id, generation.datetime
248 );
249 {
250 let mut cmd = MyCommand::new("sh");
251 cmd.arg("-c").arg(format!("mark=$(mktemp -p /etc -t fleet_rollback_marker.XXXXX) && echo -n {} > $mark && mv --no-clobber $mark /etc/fleet_rollback_marker", generation.id));
252 if let Err(e) = config.run_on(&host, cmd, true).await {
253 error!("failed to set rollback marker: {e}");
254 failed = true;
255 }
256 }
257 // Activation script also starts rollback-watchdog.timer, however, it is possible that it won't be started.
258 // Kicking it on manually will work best.
259 //
260 // There wouldn't be conflict, because here we trigger start of the primary service, and systemd will
261 // only allow one instance of it.
262 if action.should_schedule_rollback_run() {
263 let mut cmd = MyCommand::new("systemd-run");
264 cmd.comparg("--on-active", "3min")
265 .comparg("--unit", "rollback-watchdog-run")
266 .arg("systemctl")
267 .arg("start")
268 .arg("rollback-watchdog.service");
269 if let Err(e) = config.run_on(&host, cmd, true).await {
270 error!("failed to schedule rollback run: {e}");
271 failed = true;
272 }
273 }
274 }
275 if action.should_switch_profile() && !failed {
276 info!("switching generation");
277 let mut cmd = MyCommand::new("nix-env");
278 cmd.comparg("--profile", "/nix/var/nix/profiles/system")
279 .comparg("--set", &built);
280 if let Err(e) = config.run_on(&host, cmd, true).await {
281 error!("failed to switch generation: {e}");
282 failed = true;
283 }
284 }
285 if action.should_activate() && !failed {
286 let _span = info_span!("activating").entered();
287 info!("executing activation script");
288 let mut switch_script = built.clone();
289 switch_script.push("bin");
290 switch_script.push("switch-to-configuration");
291 let mut cmd = MyCommand::new(switch_script);
292 cmd.arg(action.name());
293 if let Err(e) = config.run_on(&host, cmd, true).in_current_span().await {
294 error!("failed to activate: {e}");
295 failed = true;
296 }
297 }
298 if !self.disable_rollback {
299 {
300 let _span = info_span!("rollback").entered();
301 if failed {
302 info!("executing rollback");
303 let mut cmd = MyCommand::new("systemctl");
304 cmd.arg("start").arg("rollback-watchdog.service");
305 if let Err(e) = config.run_on(&host, cmd, true).await {356 execute_upload(&self, &config, action, &host, built).await?
306 error!("failed to rollback: {e}");
307 }
308 } else {
309 info!("marking upgrade as successful");
310 let mut cmd = MyCommand::new("rm");
311 cmd.arg("-f").arg("/etc/fleet_rollback_marker");
312 if let Err(e) =
313 config.run_on(&host, cmd, true).in_current_span().await
314 {
315 error!("failed to remove rollback marker. This is bad, as the system will be rolled back by watchdog: {e}")
316 }
317 }
318 }
319 {
320 let _span = info_span!("disarm").entered();
321 info!("disarming watchdog, just in case");
322 {
323 let mut cmd = MyCommand::new("systemctl");
324 cmd.arg("stop").arg("rollback-watchdog.timer");
325 if let Err(_e) = config.run_on(&host, cmd, true).await {
326 // It is ok, if there was no reboot.
327 }
328 }
329 if action.should_schedule_rollback_run() {
330 let mut cmd = MyCommand::new("systemctl");
331 cmd.arg("stop").arg("rollback-watchdog-run.timer");
332 if let Err(e) = config.run_on(&host, cmd, true).await {
333 error!("failed to disarm rollback run: {e}");
334 }
335 }
336 }
337 }
338 }357 }
339 }358 }
340 Action::Package(PackageAction::SdImage) => {359 Action::Package(PackageAction::SdImage) => {