diff --git a/Cargo.lock b/Cargo.lock index 23f50ee..da111c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3031,6 +3031,7 @@ dependencies = [ "actix-web", "actix-web-httpauth", "base64 0.13.0", + "chrono", "clap", "http", "http-serde", diff --git a/Cargo.toml b/Cargo.toml index 1db3039..7bdd4ec 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,6 +44,7 @@ run_script = "0.9" lettre = { version = "0.9", features = ["smtp-transport"], optional = true } lettre_email = { version = "0.9", optional = true } libstrophe = { version = "0.16", default-features = false, optional = true } +chrono = "0.4" [features] default = ["notifier-email", "notifier-twilio", "notifier-slack", "notifier-zulip", "notifier-telegram", "notifier-pushover", "notifier-gotify", "notifier-matrix", "notifier-webhook"] diff --git a/res/assets/images/badges/color-maintenance-default.svg b/res/assets/images/badges/color-maintenance-default.svg new file mode 100644 index 0000000..888a9e4 --- /dev/null +++ b/res/assets/images/badges/color-maintenance-default.svg @@ -0,0 +1,7 @@ + + + + + + + diff --git a/res/assets/images/badges/icon-maintenance-default.svg b/res/assets/images/badges/icon-maintenance-default.svg new file mode 100644 index 0000000..0678b87 --- /dev/null +++ b/res/assets/images/badges/icon-maintenance-default.svg @@ -0,0 +1,19 @@ + + + + + + + + + diff --git a/res/assets/stylesheets/common.css b/res/assets/stylesheets/common.css index 4a66ef1..1cf1a9f 100644 --- a/res/assets/stylesheets/common.css +++ b/res/assets/stylesheets/common.css @@ -243,6 +243,27 @@ a:hover { border-color: rgba(241, 128, 0, 0.17); } +.badge-status-maintenance.badge-default { + background-image: url("/assets/images/badges/icon-maintenance-default.svg"); +} + +.status-maintenance-background, +.status-maintenance-background-before:before { + background-color: #A9A9A9; +} + +.status-maintenance-color { + color: #A9A9A9; +} + +.status-maintenance-background-subtle { + background-color: rgba(169, 169, 169, 0.08); +} + +.status-maintenance-border-subtle { + border-color: rgba(169, 169, 169, 0.10); +} + .badge-status-dead.badge-default { background-image: url("/assets/images/badges/icon-dead-default.svg"); } diff --git a/src/aggregator/manager.rs b/src/aggregator/manager.rs index 687c836..4fdc1d9 100644 --- a/src/aggregator/manager.rs +++ b/src/aggregator/manager.rs @@ -183,9 +183,11 @@ fn scan_and_bump_states() -> Option { probe_id, node_id, replica_id, replica_status ); - // Append bumped replica path? - if replica_status == Status::Dead { - bumped_replicas.push(format!("{}:{}:{}", probe_id, node_id, replica_id)); + if probe.status != Status::Maintenance { + // Append bumped replica path? + if replica_status == Status::Dead { + bumped_replicas.push(format!("{}:{}:{}", probe_id, node_id, replica_id)); + } } replica.status = replica_status; @@ -204,17 +206,19 @@ fn scan_and_bump_states() -> Option { node.status = node_status; } - // Bump general status with worst node status? - if let Some(worst_status) = check_child_status(&general_status, &probe_status) { - general_status = worst_status; - } - debug!( "aggregated status for probe: {} => {:?}", probe_id, probe_status ); - probe.status = probe_status; + if probe.status != Status::Maintenance { + // Bump general status with worst node status? + if let Some(worst_status) = check_child_status(&general_status, &probe_status) { + general_status = worst_status; + } + + probe.status = probe_status; + } } // Check if general status has changed diff --git a/src/config/logger.rs b/src/config/logger.rs index 21e67b5..034c918 100644 --- a/src/config/logger.rs +++ b/src/config/logger.rs @@ -6,6 +6,10 @@ use log; use log::{Level, LevelFilter, Metadata, Record, SetLoggerError}; +use std::time::SystemTime; + +use chrono::offset::Utc; +use chrono::DateTime; pub struct ConfigLogger; @@ -15,8 +19,15 @@ impl log::Log for ConfigLogger { } fn log(&self, record: &Record) { + let system_time = SystemTime::now(); + let datetime: DateTime = system_time.into(); if self.enabled(record.metadata()) { - println!("({}) - {}", record.level(), record.args()); + println!( + "{} ({}) - {}", + datetime.format("%Y-%m-%d %T"), + record.level(), + record.args() + ); } } diff --git a/src/notifier/pushover.rs b/src/notifier/pushover.rs index 095aeb8..cf6dc58 100644 --- a/src/notifier/pushover.rs +++ b/src/notifier/pushover.rs @@ -121,5 +121,6 @@ fn status_to_color(status: &Status) -> &'static str { &Status::Healthy => "#54A158", &Status::Sick => "#D5A048", &Status::Dead => "#C4291C", + &Status::Maintenance => "#A9A9A9", } } diff --git a/src/notifier/slack.rs b/src/notifier/slack.rs index 5c4b2a3..e1f178b 100644 --- a/src/notifier/slack.rs +++ b/src/notifier/slack.rs @@ -149,5 +149,6 @@ fn status_to_color(status: &Status) -> &'static str { &Status::Healthy => "good", &Status::Sick => "warning", &Status::Dead => "danger", + &Status::Maintenance => "warning", } } diff --git a/src/notifier/zulip.rs b/src/notifier/zulip.rs index c83da8f..518c42f 100644 --- a/src/notifier/zulip.rs +++ b/src/notifier/zulip.rs @@ -42,6 +42,7 @@ impl GenericNotifier for ZulipNotifier { Status::Dead => " *dead* :boom:", Status::Healthy => " *healthy* :check_mark:", Status::Sick => " *sick* :sick:", + Status::Maintenance => " *maintenance* :sick:", }; // Build message diff --git a/src/prober/manager.rs b/src/prober/manager.rs index 32e6f12..f4468d6 100644 --- a/src/prober/manager.rs +++ b/src/prober/manager.rs @@ -116,28 +116,30 @@ fn map_poll_replicas() -> Vec { // Map replica URLs to be probed for (probe_id, probe) in states.probes.iter() { - for (node_id, node) in probe.nodes.iter() { - if node.mode == Mode::Poll { - for (replica_id, replica) in node.replicas.iter() { - if let Some(ref replica_url) = replica.url { - // Clone values to scan; this ensure the write lock is not held while \ - // the replica scan is performed. As this whole operation can take time, \ - // it could lock all the pipelines depending on the shared store data \ - // (eg. the reporter HTTP API). - replica_list.push(ProbeReplica::Poll( - ProbeReplicaTarget { - probe_id: probe_id.to_owned(), - node_id: node_id.to_owned(), - replica_id: replica_id.to_owned(), - }, - ProbeReplicaPoll { - replica_url: replica_url.to_owned(), - http_headers: node.http_headers.to_owned(), - http_method: node.http_method.to_owned(), - http_body: node.http_body.to_owned(), - body_match: node.http_body_healthy_match.to_owned(), - }, - )); + if probe.status != Status::Maintenance { + for (node_id, node) in probe.nodes.iter() { + if node.mode == Mode::Poll { + for (replica_id, replica) in node.replicas.iter() { + if let Some(ref replica_url) = replica.url { + // Clone values to scan; this ensure the write lock is not held while \ + // the replica scan is performed. As this whole operation can take time, \ + // it could lock all the pipelines depending on the shared store data \ + // (eg. the reporter HTTP API). + replica_list.push(ProbeReplica::Poll( + ProbeReplicaTarget { + probe_id: probe_id.to_owned(), + node_id: node_id.to_owned(), + replica_id: replica_id.to_owned(), + }, + ProbeReplicaPoll { + replica_url: replica_url.to_owned(), + http_headers: node.http_headers.to_owned(), + http_method: node.http_method.to_owned(), + http_body: node.http_body.to_owned(), + body_match: node.http_body_healthy_match.to_owned(), + }, + )); + } } } } @@ -155,22 +157,24 @@ fn map_script_replicas() -> Vec { // Map scripts to be probed for (probe_id, probe) in states.probes.iter() { - for (node_id, node) in probe.nodes.iter() { - if node.mode == Mode::Script { - for (replica_id, replica) in node.replicas.iter() { - if let Some(ref replica_script) = replica.script { - // Clone values to scan; this ensure the write lock is not held while \ - // the script execution is performed. Same as in `map_poll_replicas()`. - replica_list.push(ProbeReplica::Script( - ProbeReplicaTarget { - probe_id: probe_id.to_owned(), - node_id: node_id.to_owned(), - replica_id: replica_id.to_owned(), - }, - ProbeReplicaScript { - script: replica_script.to_owned(), - }, - )); + if probe.status != Status::Maintenance { + for (node_id, node) in probe.nodes.iter() { + if node.mode == Mode::Script { + for (replica_id, replica) in node.replicas.iter() { + if let Some(ref replica_script) = replica.script { + // Clone values to scan; this ensure the write lock is not held while \ + // the script execution is performed. Same as in `map_poll_replicas()`. + replica_list.push(ProbeReplica::Script( + ProbeReplicaTarget { + probe_id: probe_id.to_owned(), + node_id: node_id.to_owned(), + replica_id: replica_id.to_owned(), + }, + ProbeReplicaScript { + script: replica_script.to_owned(), + }, + )); + } } } } diff --git a/src/prober/status.rs b/src/prober/status.rs index c49e111..67a3e25 100644 --- a/src/prober/status.rs +++ b/src/prober/status.rs @@ -14,6 +14,9 @@ pub enum Status { #[serde(rename = "dead")] Dead, + + #[serde(rename = "maintenance")] + Maintenance, } impl Status { @@ -22,6 +25,7 @@ impl Status { &Status::Healthy => "healthy", &Status::Sick => "sick", &Status::Dead => "dead", + &Status::Maintenance => "maintenance", } } @@ -30,6 +34,7 @@ impl Status { &Status::Dead => "\u{274c}", &Status::Sick => "\u{26a0}", &Status::Healthy => "\u{2705}", + &Status::Maintenance => "\u{1F6A7}", } } } diff --git a/src/responder/manager.rs b/src/responder/manager.rs index 400ad63..ac866c6 100644 --- a/src/responder/manager.rs +++ b/src/responder/manager.rs @@ -64,6 +64,18 @@ pub fn run() { .guard(guard::Delete()) .to(routes::reporter_flush), ) + .service( + web::resource("/probe/start_planned_maintenance/{probe_id}") + .wrap(middleware_auth.clone()) + .guard(guard::Post()) + .to(routes::start_planned_maintenance), + ) + .service( + web::resource("/probe/stop_planned_maintenance/{probe_id}") + .wrap(middleware_auth.clone()) + .guard(guard::Post()) + .to(routes::stop_planned_maintenance), + ) }) .workers(APP_CONF.server.workers) .bind(APP_CONF.server.inet) diff --git a/src/responder/routes.rs b/src/responder/routes.rs index 512dc78..58b20f3 100644 --- a/src/responder/routes.rs +++ b/src/responder/routes.rs @@ -16,6 +16,7 @@ use crate::prober::report::{ handle_load as handle_load_report, HandleFlushError, HandleHealthError, HandleLoadError, }; use crate::APP_CONF; +use crate::prober::status::Status; #[get("/")] async fn index(tera: Data) -> HttpResponse { @@ -86,6 +87,32 @@ async fn assets_javascripts(web::Path(file): web::Path) -> Option) -> HttpResponse { + let store = &mut PROBER_STORE.write().unwrap(); + if let Some(ref mut probe) = store.states.probes.get_mut(&probe_id) { + probe.status = Status::Maintenance; + info!("Starting planned maintenance for probe: {:?}. Notifications will be suppressed for this probe.", probe_id); + HttpResponse::Ok().finish() + } else { + HttpResponse::BadRequest().body(format!("Could not find service named '{}'", probe_id)) + } +} + +pub async fn stop_planned_maintenance(web::Path(probe_id): web::Path) -> HttpResponse { + let store = &mut PROBER_STORE.write().unwrap(); + if let Some(ref mut probe) = store.states.probes.get_mut(&probe_id) { + if probe.status == Status::Maintenance { + probe.status = Status::Healthy; + info!("Stopping planned maintenance for probe: {:?}", probe_id); + HttpResponse::Ok().finish() + } else { + HttpResponse::BadRequest().body(format!("ERROR: Service is not currently set to status maintenance: {:?}", probe_id)) + } + } else { + HttpResponse::BadRequest().body(format!("Could not find service named '{}'", probe_id)) + } +} + // Notice: reporter report route is managed in manager due to authentication needs pub async fn reporter_report( web::Path((probe_id, node_id)): web::Path<(String, String)>,