Skip to content

Commit

Permalink
added apis for planned maintenance mode w/ burnettk
Browse files Browse the repository at this point in the history
  • Loading branch information
jasquat committed Apr 19, 2022
1 parent 54ea852 commit d2a6959
Show file tree
Hide file tree
Showing 14 changed files with 163 additions and 48 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ run_script = "0.9"
lettre = { version = "0.9", features = ["smtp-transport"], optional = true }
lettre_email = { version = "0.9", optional = true }
libstrophe = { version = "0.16", default-features = false, optional = true }
chrono = "0.4"

[features]
default = ["notifier-email", "notifier-twilio", "notifier-slack", "notifier-zulip", "notifier-telegram", "notifier-pushover", "notifier-gotify", "notifier-matrix", "notifier-webhook"]
Expand Down
7 changes: 7 additions & 0 deletions res/assets/images/badges/color-maintenance-default.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
19 changes: 19 additions & 0 deletions res/assets/images/badges/icon-maintenance-default.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
21 changes: 21 additions & 0 deletions res/assets/stylesheets/common.css
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,27 @@ a:hover {
border-color: rgba(241, 128, 0, 0.17);
}

.badge-status-maintenance.badge-default {
background-image: url("/assets/images/badges/icon-maintenance-default.svg");
}

.status-maintenance-background,
.status-maintenance-background-before:before {
background-color: #A9A9A9;
}

.status-maintenance-color {
color: #A9A9A9;
}

.status-maintenance-background-subtle {
background-color: rgba(169, 169, 169, 0.08);
}

.status-maintenance-border-subtle {
border-color: rgba(169, 169, 169, 0.10);
}

.badge-status-dead.badge-default {
background-image: url("/assets/images/badges/icon-dead-default.svg");
}
Expand Down
22 changes: 13 additions & 9 deletions src/aggregator/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,11 @@ fn scan_and_bump_states() -> Option<BumpedStates> {
probe_id, node_id, replica_id, replica_status
);

// Append bumped replica path?
if replica_status == Status::Dead {
bumped_replicas.push(format!("{}:{}:{}", probe_id, node_id, replica_id));
if probe.status != Status::Maintenance {
// Append bumped replica path?
if replica_status == Status::Dead {
bumped_replicas.push(format!("{}:{}:{}", probe_id, node_id, replica_id));
}
}

replica.status = replica_status;
Expand All @@ -204,17 +206,19 @@ fn scan_and_bump_states() -> Option<BumpedStates> {
node.status = node_status;
}

// Bump general status with worst node status?
if let Some(worst_status) = check_child_status(&general_status, &probe_status) {
general_status = worst_status;
}

debug!(
"aggregated status for probe: {} => {:?}",
probe_id, probe_status
);

probe.status = probe_status;
if probe.status != Status::Maintenance {
// Bump general status with worst node status?
if let Some(worst_status) = check_child_status(&general_status, &probe_status) {
general_status = worst_status;
}

probe.status = probe_status;
}
}

// Check if general status has changed
Expand Down
13 changes: 12 additions & 1 deletion src/config/logger.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@

use log;
use log::{Level, LevelFilter, Metadata, Record, SetLoggerError};
use std::time::SystemTime;

use chrono::offset::Utc;
use chrono::DateTime;

pub struct ConfigLogger;

Expand All @@ -15,8 +19,15 @@ impl log::Log for ConfigLogger {
}

fn log(&self, record: &Record) {
let system_time = SystemTime::now();
let datetime: DateTime<Utc> = system_time.into();
if self.enabled(record.metadata()) {
println!("({}) - {}", record.level(), record.args());
println!(
"{} ({}) - {}",
datetime.format("%Y-%m-%d %T"),
record.level(),
record.args()
);
}
}

Expand Down
1 change: 1 addition & 0 deletions src/notifier/pushover.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,5 +121,6 @@ fn status_to_color(status: &Status) -> &'static str {
&Status::Healthy => "#54A158",
&Status::Sick => "#D5A048",
&Status::Dead => "#C4291C",
&Status::Maintenance => "#A9A9A9",
}
}
1 change: 1 addition & 0 deletions src/notifier/slack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,5 +149,6 @@ fn status_to_color(status: &Status) -> &'static str {
&Status::Healthy => "good",
&Status::Sick => "warning",
&Status::Dead => "danger",
&Status::Maintenance => "warning",
}
}
1 change: 1 addition & 0 deletions src/notifier/zulip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ impl GenericNotifier for ZulipNotifier {
Status::Dead => " *dead* :boom:",
Status::Healthy => " *healthy* :check_mark:",
Status::Sick => " *sick* :sick:",
Status::Maintenance => " *maintenance* :sick:",
};

// Build message
Expand Down
80 changes: 42 additions & 38 deletions src/prober/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,28 +116,30 @@ fn map_poll_replicas() -> Vec<ProbeReplica> {

// Map replica URLs to be probed
for (probe_id, probe) in states.probes.iter() {
for (node_id, node) in probe.nodes.iter() {
if node.mode == Mode::Poll {
for (replica_id, replica) in node.replicas.iter() {
if let Some(ref replica_url) = replica.url {
// Clone values to scan; this ensure the write lock is not held while \
// the replica scan is performed. As this whole operation can take time, \
// it could lock all the pipelines depending on the shared store data \
// (eg. the reporter HTTP API).
replica_list.push(ProbeReplica::Poll(
ProbeReplicaTarget {
probe_id: probe_id.to_owned(),
node_id: node_id.to_owned(),
replica_id: replica_id.to_owned(),
},
ProbeReplicaPoll {
replica_url: replica_url.to_owned(),
http_headers: node.http_headers.to_owned(),
http_method: node.http_method.to_owned(),
http_body: node.http_body.to_owned(),
body_match: node.http_body_healthy_match.to_owned(),
},
));
if probe.status != Status::Maintenance {
for (node_id, node) in probe.nodes.iter() {
if node.mode == Mode::Poll {
for (replica_id, replica) in node.replicas.iter() {
if let Some(ref replica_url) = replica.url {
// Clone values to scan; this ensure the write lock is not held while \
// the replica scan is performed. As this whole operation can take time, \
// it could lock all the pipelines depending on the shared store data \
// (eg. the reporter HTTP API).
replica_list.push(ProbeReplica::Poll(
ProbeReplicaTarget {
probe_id: probe_id.to_owned(),
node_id: node_id.to_owned(),
replica_id: replica_id.to_owned(),
},
ProbeReplicaPoll {
replica_url: replica_url.to_owned(),
http_headers: node.http_headers.to_owned(),
http_method: node.http_method.to_owned(),
http_body: node.http_body.to_owned(),
body_match: node.http_body_healthy_match.to_owned(),
},
));
}
}
}
}
Expand All @@ -155,22 +157,24 @@ fn map_script_replicas() -> Vec<ProbeReplica> {

// Map scripts to be probed
for (probe_id, probe) in states.probes.iter() {
for (node_id, node) in probe.nodes.iter() {
if node.mode == Mode::Script {
for (replica_id, replica) in node.replicas.iter() {
if let Some(ref replica_script) = replica.script {
// Clone values to scan; this ensure the write lock is not held while \
// the script execution is performed. Same as in `map_poll_replicas()`.
replica_list.push(ProbeReplica::Script(
ProbeReplicaTarget {
probe_id: probe_id.to_owned(),
node_id: node_id.to_owned(),
replica_id: replica_id.to_owned(),
},
ProbeReplicaScript {
script: replica_script.to_owned(),
},
));
if probe.status != Status::Maintenance {
for (node_id, node) in probe.nodes.iter() {
if node.mode == Mode::Script {
for (replica_id, replica) in node.replicas.iter() {
if let Some(ref replica_script) = replica.script {
// Clone values to scan; this ensure the write lock is not held while \
// the script execution is performed. Same as in `map_poll_replicas()`.
replica_list.push(ProbeReplica::Script(
ProbeReplicaTarget {
probe_id: probe_id.to_owned(),
node_id: node_id.to_owned(),
replica_id: replica_id.to_owned(),
},
ProbeReplicaScript {
script: replica_script.to_owned(),
},
));
}
}
}
}
Expand Down
5 changes: 5 additions & 0 deletions src/prober/status.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ pub enum Status {

#[serde(rename = "dead")]
Dead,

#[serde(rename = "maintenance")]
Maintenance,
}

impl Status {
Expand All @@ -22,6 +25,7 @@ impl Status {
&Status::Healthy => "healthy",
&Status::Sick => "sick",
&Status::Dead => "dead",
&Status::Maintenance => "maintenance",
}
}

Expand All @@ -30,6 +34,7 @@ impl Status {
&Status::Dead => "\u{274c}",
&Status::Sick => "\u{26a0}",
&Status::Healthy => "\u{2705}",
&Status::Maintenance => "\u{1F6A7}",
}
}
}
12 changes: 12 additions & 0 deletions src/responder/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,18 @@ pub fn run() {
.guard(guard::Delete())
.to(routes::reporter_flush),
)
.service(
web::resource("/probe/start_planned_maintenance/{probe_id}")
.wrap(middleware_auth.clone())
.guard(guard::Post())
.to(routes::start_planned_maintenance),
)
.service(
web::resource("/probe/stop_planned_maintenance/{probe_id}")
.wrap(middleware_auth.clone())
.guard(guard::Post())
.to(routes::stop_planned_maintenance),
)
})
.workers(APP_CONF.server.workers)
.bind(APP_CONF.server.inet)
Expand Down
27 changes: 27 additions & 0 deletions src/responder/routes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use crate::prober::report::{
handle_load as handle_load_report, HandleFlushError, HandleHealthError, HandleLoadError,
};
use crate::APP_CONF;
use crate::prober::status::Status;

#[get("/")]
async fn index(tera: Data<Tera>) -> HttpResponse {
Expand Down Expand Up @@ -86,6 +87,32 @@ async fn assets_javascripts(web::Path(file): web::Path<String>) -> Option<NamedF
NamedFile::open(APP_CONF.assets.path.join("javascripts").join(file)).ok()
}

pub async fn start_planned_maintenance(web::Path(probe_id): web::Path<String>) -> HttpResponse {
let store = &mut PROBER_STORE.write().unwrap();
if let Some(ref mut probe) = store.states.probes.get_mut(&probe_id) {
probe.status = Status::Maintenance;
info!("Starting planned maintenance for probe: {:?}. Notifications will be suppressed for this probe.", probe_id);
HttpResponse::Ok().finish()
} else {
HttpResponse::BadRequest().body(format!("Could not find service named '{}'", probe_id))
}
}

pub async fn stop_planned_maintenance(web::Path(probe_id): web::Path<String>) -> HttpResponse {
let store = &mut PROBER_STORE.write().unwrap();
if let Some(ref mut probe) = store.states.probes.get_mut(&probe_id) {
if probe.status == Status::Maintenance {
probe.status = Status::Healthy;
info!("Stopping planned maintenance for probe: {:?}", probe_id);
HttpResponse::Ok().finish()
} else {
HttpResponse::BadRequest().body(format!("ERROR: Service is not currently set to status maintenance: {:?}", probe_id))
}
} else {
HttpResponse::BadRequest().body(format!("Could not find service named '{}'", probe_id))
}
}

// Notice: reporter report route is managed in manager due to authentication needs
pub async fn reporter_report(
web::Path((probe_id, node_id)): web::Path<(String, String)>,
Expand Down

0 comments on commit d2a6959

Please sign in to comment.