Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into john/rss-handoff-sets…
Browse files Browse the repository at this point in the history
…-initial-blueprint
  • Loading branch information
jgallagher committed Mar 15, 2024
2 parents 0b28298 + e825a08 commit 6bafd89
Show file tree
Hide file tree
Showing 50 changed files with 2,569 additions and 8,907 deletions.
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions clients/nexus-client/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ serde_json.workspace = true
slog.workspace = true
uuid.workspace = true
omicron-workspace-hack.workspace = true
omicron-uuid-kinds.workspace = true
4 changes: 4 additions & 0 deletions clients/nexus-client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ progenitor::generate_api!(
NewPasswordHash = omicron_passwords::NewPasswordHash,
NetworkInterface = omicron_common::api::internal::shared::NetworkInterface,
NetworkInterfaceKind = omicron_common::api::internal::shared::NetworkInterfaceKind,
TypedUuidForDownstairsKind = omicron_uuid_kinds::TypedUuid<omicron_uuid_kinds::DownstairsKind>,
TypedUuidForUpstairsKind = omicron_uuid_kinds::TypedUuid<omicron_uuid_kinds::UpstairsKind>,
TypedUuidForUpstairsRepairKind = omicron_uuid_kinds::TypedUuid<omicron_uuid_kinds::UpstairsRepairKind>,
TypedUuidForUpstairsSessionKind = omicron_uuid_kinds::TypedUuid<omicron_uuid_kinds::UpstairsSessionKind>,
},
patch = {
SledAgentInfo = { derives = [PartialEq, Eq] },
Expand Down
1 change: 1 addition & 0 deletions common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ tokio = { workspace = true, features = ["full"] }
uuid.workspace = true
parse-display.workspace = true
progenitor.workspace = true
progenitor-client.workspace = true
omicron-workspace-hack.workspace = true
once_cell.workspace = true
regress.workspace = true
Expand Down
83 changes: 83 additions & 0 deletions common/src/api/internal/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ use crate::api::external::{
InstanceState, IpNet, SemverVersion, Vni,
};
use chrono::{DateTime, Utc};
use omicron_uuid_kinds::DownstairsRegionKind;
use omicron_uuid_kinds::TypedUuid;
use omicron_uuid_kinds::UpstairsRepairKind;
use omicron_uuid_kinds::UpstairsSessionKind;
use parse_display::{Display, FromStr};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -251,3 +255,82 @@ pub enum HostIdentifier {
Ip(IpNet),
Vpc(Vni),
}

#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, Copy)]
#[serde(rename_all = "snake_case")]
pub enum UpstairsRepairType {
Live,
Reconciliation,
}

#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone)]
pub struct DownstairsUnderRepair {
pub region_uuid: TypedUuid<DownstairsRegionKind>,
pub target_addr: std::net::SocketAddrV6,
}

#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone)]
pub struct RepairStartInfo {
pub time: DateTime<Utc>,
pub session_id: TypedUuid<UpstairsSessionKind>,
pub repair_id: TypedUuid<UpstairsRepairKind>,
pub repair_type: UpstairsRepairType,
pub repairs: Vec<DownstairsUnderRepair>,
}

#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone)]
pub struct RepairFinishInfo {
pub time: DateTime<Utc>,
pub session_id: TypedUuid<UpstairsSessionKind>,
pub repair_id: TypedUuid<UpstairsRepairKind>,
pub repair_type: UpstairsRepairType,
pub repairs: Vec<DownstairsUnderRepair>,
pub aborted: bool,
}

#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone)]
pub struct RepairProgress {
pub time: DateTime<Utc>,
pub current_item: i64,
pub total_items: i64,
}

#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone)]
#[serde(rename_all = "snake_case")]
pub enum DownstairsClientStopRequestReason {
Replacing,
Disabled,
FailedReconcile,
IOError,
BadNegotiationOrder,
Incompatible,
FailedLiveRepair,
TooManyOutstandingJobs,
Deactivated,
}

#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone)]
pub struct DownstairsClientStopRequest {
pub time: DateTime<Utc>,
pub reason: DownstairsClientStopRequestReason,
}

#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone)]
#[serde(rename_all = "snake_case")]
pub enum DownstairsClientStoppedReason {
ConnectionTimeout,
ConnectionFailed,
Timeout,
WriteFailed,
ReadFailed,
RequestedStop,
Finished,
QueueClosed,
ReceiveTaskCancelled,
}

#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone)]
pub struct DownstairsClientStopped {
pub time: DateTime<Utc>,
pub reason: DownstairsClientStoppedReason,
}
81 changes: 81 additions & 0 deletions common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,84 @@ impl slog::KV for FileKv {
}

pub const OMICRON_DPD_TAG: &str = "omicron";

use futures::Future;
use slog::warn;

/// Retry a progenitor client operation until a known result is returned.
///
/// Saga execution relies on the outcome of an external call being known: since
/// they are idempotent, reissue the external call until a known result comes
/// back. Retry if a communication error is seen, or if another retryable error
/// is seen.
///
/// Note that retrying is only valid if the call itself is idempotent.
pub async fn retry_until_known_result<F, T, E, Fut>(
log: &slog::Logger,
mut f: F,
) -> Result<T, progenitor_client::Error<E>>
where
F: FnMut() -> Fut,
Fut: Future<Output = Result<T, progenitor_client::Error<E>>>,
E: std::fmt::Debug,
{
backoff::retry_notify(
backoff::retry_policy_internal_service(),
move || {
let fut = f();
async move {
match fut.await {
Err(progenitor_client::Error::CommunicationError(e)) => {
warn!(
log,
"saw transient communication error {}, retrying...",
e,
);

Err(backoff::BackoffError::transient(
progenitor_client::Error::CommunicationError(e),
))
}

Err(progenitor_client::Error::ErrorResponse(
response_value,
)) => {
match response_value.status() {
// Retry on 503 or 429
http::StatusCode::SERVICE_UNAVAILABLE
| http::StatusCode::TOO_MANY_REQUESTS => {
Err(backoff::BackoffError::transient(
progenitor_client::Error::ErrorResponse(
response_value,
),
))
}

// Anything else is a permanent error
_ => Err(backoff::BackoffError::Permanent(
progenitor_client::Error::ErrorResponse(
response_value,
),
)),
}
}

Err(e) => {
warn!(log, "saw permanent error {}, aborting", e,);

Err(backoff::BackoffError::Permanent(e))
}

Ok(v) => Ok(v),
}
}
},
|error: progenitor_client::Error<_>, delay| {
warn!(
log,
"failed external call ({:?}), will retry in {:?}", error, delay,
);
},
)
.await
}
Loading

0 comments on commit 6bafd89

Please sign in to comment.