Skip to content

Commit

Permalink
feat: Add health checks to run before starting services (#242)
Browse files Browse the repository at this point in the history
This is a separate trait, vs adding a "health check" method to
`AppService`, to allow defining health checks that apply to multiple
services. For example, most services would require the DB and Redis
connections to be valid, so we would want to perform a check for these
resources a single time before starting any service instead of once for
every service that needs the resources.

Another benefit of using a separate trait is, because the health checks
are decoupled from services, they can potentially be used in other parts
of the app. For example, they could be used to implement a "health
check" API endpoint.

For now, I think it makes sense to register health checks on the
`ServiceRegistry` since the checks are run right before running the
services, and the checks need to succeed in order to run the services.
In the future, it may make more sense to add a new method to the `App`
trait in order to register health checks. In general, however, I'm
trying to steer away from adding too many methods to the `App` trait.

Closes #237
  • Loading branch information
spencewenski authored Jun 23, 2024
1 parent 46c9611 commit f885976
Show file tree
Hide file tree
Showing 19 changed files with 437 additions and 20 deletions.
33 changes: 22 additions & 11 deletions src/api/core/health.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,19 +81,19 @@ where
let timer = Instant::now();

#[cfg(any(feature = "db-sql", feature = "sidekiq"))]
let timeout_duration = Duration::from_secs(1);
let timeout_duration = Some(Duration::from_secs(1));

#[cfg(all(feature = "db-sql", feature = "sidekiq"))]
let (db, (redis_enqueue, redis_fetch)) = tokio::join!(
db_health(state, timeout_duration),
all_redis_health(state, timeout_duration)
all_sidekiq_redis_health(state, timeout_duration)
);

#[cfg(all(feature = "db-sql", not(feature = "sidekiq")))]
let db = db_health(state, timeout_duration).await;

#[cfg(all(not(feature = "db-sql"), feature = "sidekiq"))]
let (redis_enqueue, redis_fetch) = all_redis_health(state, timeout_duration).await;
let (redis_enqueue, redis_fetch) = all_sidekiq_redis_health(state, timeout_duration).await;

Ok(HeathCheckResponse {
latency: timer.elapsed().as_millis(),
Expand All @@ -107,7 +107,10 @@ where
}

#[cfg(feature = "db-sql")]
pub(crate) async fn db_health<S>(state: &AppContext<S>, duration: Duration) -> ResourceHealth
pub(crate) async fn db_health<S>(
state: &AppContext<S>,
duration: Option<Duration>,
) -> ResourceHealth
where
S: Clone + Send + Sync + 'static,
{
Expand All @@ -129,15 +132,19 @@ where

#[cfg(feature = "db-sql")]
#[instrument(skip_all)]
async fn ping_db(db: &DatabaseConnection, duration: Duration) -> RoadsterResult<()> {
timeout(duration, db.ping()).await??;
async fn ping_db(db: &DatabaseConnection, duration: Option<Duration>) -> RoadsterResult<()> {
if let Some(duration) = duration {
timeout(duration, db.ping()).await??;
} else {
db.ping().await?;
}
Ok(())
}

#[cfg(feature = "sidekiq")]
pub(crate) async fn all_redis_health<S>(
pub(crate) async fn all_sidekiq_redis_health<S>(
state: &AppContext<S>,
duration: Duration,
duration: Option<Duration>,
) -> (ResourceHealth, Option<ResourceHealth>)
where
S: Clone + Send + Sync + 'static,
Expand All @@ -156,7 +163,7 @@ where

#[cfg(feature = "sidekiq")]
#[instrument(skip_all)]
async fn redis_health(redis: &sidekiq::RedisPool, duration: Duration) -> ResourceHealth {
async fn redis_health(redis: &sidekiq::RedisPool, duration: Option<Duration>) -> ResourceHealth {
let redis_timer = Instant::now();
let (redis_status, acquire_conn_latency, ping_latency) = match ping_redis(redis, duration).await
{
Expand All @@ -182,10 +189,14 @@ async fn redis_health(redis: &sidekiq::RedisPool, duration: Duration) -> Resourc
#[instrument(skip_all)]
async fn ping_redis(
redis: &sidekiq::RedisPool,
duration: Duration,
duration: Option<Duration>,
) -> RoadsterResult<(Duration, Duration)> {
let timer = Instant::now();
let mut conn = timeout(duration, redis.get()).await??;
let mut conn = if let Some(duration) = duration {
timeout(duration, redis.get()).await??
} else {
redis.get().await?
};
let acquire_conn_latency = timer.elapsed();

let timer = Instant::now();
Expand Down
2 changes: 2 additions & 0 deletions src/app/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ where
A::M::up(context.db(), None).await?;
}

crate::service::runner::health_checks(&service_registry, &context).await?;

crate::service::runner::before_run(&service_registry, &context).await?;

crate::service::runner::run(service_registry, &context).await?;
Expand Down
5 changes: 5 additions & 0 deletions src/config/app_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use crate::config::auth::Auth;
#[cfg(feature = "db-sql")]
use crate::config::database::Database;
use crate::config::environment::{Environment, ENVIRONMENT_ENV_VAR_NAME};
use crate::config::health_check::HealthCheck;
use crate::config::service::Service;
use crate::config::tracing::Tracing;
use crate::error::RoadsterResult;
Expand All @@ -25,6 +26,8 @@ pub struct AppConfig {
#[validate(nested)]
pub app: App,
#[validate(nested)]
pub health_check: HealthCheck,
#[validate(nested)]
pub service: Service,
#[validate(nested)]
pub auth: Auth,
Expand Down Expand Up @@ -168,6 +171,8 @@ impl AppConfig {
#[cfg(feature = "sidekiq")]
let config = config.add_source(crate::config::service::worker::sidekiq::default_config());

let config = config.add_source(crate::config::health_check::default_config());

config
}

Expand Down
3 changes: 3 additions & 0 deletions src/config/default.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@ shutdown-on-error = true

[service]
default-enable = true

[health-check]
default-enable = true
3 changes: 3 additions & 0 deletions src/config/health_check/default.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[health-check.database]

[health-check.sidekiq]
82 changes: 82 additions & 0 deletions src/config/health_check/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
use crate::app::context::AppContext;
use crate::util::serde_util::default_true;
use config::{FileFormat, FileSourceString};
use serde_derive::{Deserialize, Serialize};
use validator::Validate;

pub fn default_config() -> config::File<FileSourceString, FileFormat> {
config::File::from_str(include_str!("default.toml"), FileFormat::Toml)
}

#[derive(Debug, Clone, Validate, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
#[non_exhaustive]
pub struct HealthCheck {
#[serde(default = "default_true")]
pub default_enable: bool,
#[cfg(feature = "db-sql")]
pub database: HealthCheckConfig<()>,
#[cfg(feature = "sidekiq")]
pub sidekiq: HealthCheckConfig<()>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
#[non_exhaustive]
pub struct CommonConfig {
// Optional so we can tell the difference between a consumer explicitly enabling/disabling
// the health check, vs the health check being enabled/disabled by default.
// If this is `None`, the value will match the value of `HealthCheck#default_enable`.
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub enable: Option<bool>,
}

impl CommonConfig {
pub fn enabled<S>(&self, context: &AppContext<S>) -> bool {
self.enable
.unwrap_or(context.config().health_check.default_enable)
}
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
#[non_exhaustive]
pub struct HealthCheckConfig<T> {
#[serde(flatten)]
pub common: CommonConfig,
#[serde(flatten)]
pub custom: T,
}

#[cfg(test)]
mod tests {
use super::*;
use crate::config::app_config::AppConfig;
use rstest::rstest;

#[rstest]
#[case(true, None, true)]
#[case(true, Some(true), true)]
#[case(true, Some(false), false)]
#[case(false, None, false)]
#[case(false, Some(true), true)]
#[case(false, Some(false), false)]
#[cfg_attr(coverage_nightly, coverage(off))]
fn common_config_enabled(
#[case] default_enable: bool,
#[case] enable: Option<bool>,
#[case] expected_enabled: bool,
) {
// Arrange
let mut config = AppConfig::test(None).unwrap();
config.health_check.default_enable = default_enable;

let context = AppContext::<()>::test(Some(config), None, None).unwrap();

let common_config = CommonConfig { enable };

// Act/Assert
assert_eq!(common_config.enabled(&context), expected_enabled);
}
}
1 change: 1 addition & 0 deletions src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ pub mod auth;
#[cfg(feature = "db-sql")]
pub mod database;
pub mod environment;
pub mod health_check;
pub mod service;
pub mod tracing;
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ environment = 'test'
name = 'Test'
shutdown-on-error = true

[health-check]
default-enable = true

[health-check.database]

[health-check.sidekiq]

[service]
default-enable = true

Expand Down
68 changes: 68 additions & 0 deletions src/health_check/database.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
use crate::api::core::health::{db_health, Status};
use crate::app::context::AppContext;
use crate::app::App;
use crate::error::RoadsterResult;
use crate::health_check::HealthCheck;
use anyhow::anyhow;
use async_trait::async_trait;
use tracing::instrument;

pub struct DatabaseHealthCheck;

#[async_trait]
impl<A: App + 'static> HealthCheck<A> for DatabaseHealthCheck {
fn name(&self) -> String {
"db".to_string()
}

fn enabled(&self, context: &AppContext<A::State>) -> bool {
enabled(context)
}

#[instrument(skip_all)]
async fn check(&self, app_context: &AppContext<A::State>) -> RoadsterResult<()> {
let health = db_health(app_context, None).await;

if let Status::Err(err) = health.status {
return Err(anyhow!("Database connection pool is not healthy: {:?}", err).into());
}

Ok(())
}
}

fn enabled<S>(context: &AppContext<S>) -> bool {
context
.config()
.health_check
.database
.common
.enabled(context)
}

#[cfg(test)]
mod tests {
use super::*;
use crate::config::app_config::AppConfig;
use rstest::rstest;

#[rstest]
#[case(false, Some(true), true)]
#[case(false, Some(false), false)]
#[cfg_attr(coverage_nightly, coverage(off))]
fn enabled(
#[case] default_enable: bool,
#[case] enable: Option<bool>,
#[case] expected_enabled: bool,
) {
// Arrange
let mut config = AppConfig::test(None).unwrap();
config.health_check.default_enable = default_enable;
config.health_check.database.common.enable = enable;

let context = AppContext::<()>::test(Some(config), None, None).unwrap();

// Act/Assert
assert_eq!(super::enabled(&context), expected_enabled);
}
}
60 changes: 60 additions & 0 deletions src/health_check/default.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
use crate::app::context::AppContext;
use crate::app::App;
#[cfg(feature = "db-sql")]
use crate::health_check::database::DatabaseHealthCheck;
#[cfg(feature = "sidekiq")]
use crate::health_check::sidekiq::SidekiqHealthCheck;
use crate::health_check::HealthCheck;
use std::collections::BTreeMap;

pub fn default_health_checks<A: App + 'static>(
context: &AppContext<A::State>,
) -> BTreeMap<String, Box<dyn HealthCheck<A>>> {
let health_check: Vec<Box<dyn HealthCheck<A>>> = vec![
#[cfg(feature = "db-sql")]
Box::new(DatabaseHealthCheck),
#[cfg(feature = "sidekiq")]
Box::new(SidekiqHealthCheck),
];
health_check
.into_iter()
.filter(|health_check| health_check.enabled(context))
.map(|health_check| (health_check.name(), health_check))
.collect()
}

#[cfg(all(test, feature = "sidekiq", feature = "db-sql",))]
mod tests {
use crate::app::context::AppContext;
use crate::app::MockApp;
use crate::config::app_config::AppConfig;
use crate::util::test_util::TestCase;
use insta::assert_toml_snapshot;
use itertools::Itertools;
use rstest::{fixture, rstest};

#[fixture]
#[cfg_attr(coverage_nightly, coverage(off))]
fn case() -> TestCase {
Default::default()
}

#[rstest]
#[case(false)]
#[case(true)]
#[cfg_attr(coverage_nightly, coverage(off))]
fn default_middleware(_case: TestCase, #[case] default_enable: bool) {
// Arrange
let mut config = AppConfig::test(None).unwrap();
config.health_check.default_enable = default_enable;

let context = AppContext::<()>::test(Some(config), None, None).unwrap();

// Act
let health_checks = super::default_health_checks::<MockApp>(&context);
let health_checks = health_checks.keys().collect_vec();

// Assert
assert_toml_snapshot!(health_checks);
}
}
Loading

0 comments on commit f885976

Please sign in to comment.