Skip to content

Commit

Permalink
Make drones terminate backends on unhandled errors, instead of losing…
Browse files Browse the repository at this point in the history
… track of them. (#461)

* in case of errors fail backend instead of letting it continue
* add some errors to tracing to help with noticing db issues etc
  • Loading branch information
pretentious7 authored Oct 18, 2023
1 parent afe6b4a commit 1346aa4
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 12 deletions.
17 changes: 12 additions & 5 deletions drone/src/agent/executor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -336,11 +336,18 @@ impl<E: Engine> Executor<E> {
}
self.update_backend_state(spawn_request, state).await;
}
_ => tracing::error!(
?error,
?state,
"Error unhandled (no change in backend state)"
),
_ => {
tracing::error!(
?error,
?state,
"Error unhandled (putting backend into failed state"
);

// leads to .step() running with failed.
state = BackendState::Failed;
self.update_backend_state(spawn_request, state).await;
continue;
}
}
break;
}
Expand Down
40 changes: 33 additions & 7 deletions drone/src/database.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,25 @@ pub struct ProxyRoute {
pub bearer_token: Option<String>,
}

const SQLITE_DB_LOCKED_ERR_CODE: u64 = 5;

// logs in cases where issues have been attributed to resource
// starvation on drone
fn augment_db_error(err: sqlx::Error) -> sqlx::Error {
match err {
sqlx::Error::Database(ref e)
if e.code().unwrap() == SQLITE_DB_LOCKED_ERR_CODE.to_string() =>
{
tracing::error!("Database is Locked, may indicate that drone process needs more CPU!");
}
sqlx::Error::PoolTimedOut => {
tracing::error!("SQLite Connection Pool timed out, indicates that the drone process needs more CPU!");
}
_ => {}
};
err
}

#[allow(unused)]
impl DroneDatabase {
pub async fn new(db_path: &Path) -> Result<DroneDatabase> {
Expand Down Expand Up @@ -63,7 +82,8 @@ impl DroneDatabase {
spec,
)
.execute(&self.pool)
.await?;
.await
.map_err(augment_db_error)?;

Ok(())
}
Expand All @@ -76,7 +96,8 @@ impl DroneDatabase {
"
)
.fetch_one(&self.pool)
.await?;
.await
.map_err(augment_db_error)?;
Ok(result.c)
}

Expand All @@ -88,7 +109,8 @@ impl DroneDatabase {
"
)
.fetch_all(&self.pool)
.await?
.await
.map_err(augment_db_error)?
.iter()
.map(|d| {
Ok(Backend {
Expand Down Expand Up @@ -118,7 +140,8 @@ impl DroneDatabase {
backend_id,
)
.execute(&self.pool)
.await?;
.await
.map_err(augment_db_error)?;

Ok(())
}
Expand Down Expand Up @@ -162,7 +185,8 @@ impl DroneDatabase {
bearer_token,
)
.execute(&self.pool)
.await?;
.await
.map_err(augment_db_error)?;

Ok(())
}
Expand All @@ -178,7 +202,8 @@ impl DroneDatabase {
subdomain
)
.execute(&self.pool)
.await?;
.await
.map_err(augment_db_error)?;
}

Ok(())
Expand All @@ -196,7 +221,8 @@ impl DroneDatabase {
backend_id
)
.fetch_one(&self.pool)
.await?
.await
.map_err(augment_db_error)?
.last_active;

Ok(Utc.timestamp_opt(time, 0).single().expect(
Expand Down

0 comments on commit 1346aa4

Please sign in to comment.