Skip to content

Commit

Permalink
WIP: stringing internal api stuff together on both ends
Browse files Browse the repository at this point in the history
  • Loading branch information
lif committed Dec 21, 2023
1 parent c90b0a7 commit 3ff1329
Show file tree
Hide file tree
Showing 4 changed files with 195 additions and 27 deletions.
43 changes: 27 additions & 16 deletions nexus/src/app/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -976,31 +976,42 @@ impl super::Nexus {
&self,
opctx: &OpContext,
instance_id: &Uuid,
result: Result<
Option<nexus::SledInstanceState>,
sled_agent_client::types::Error,
>,
result: Result<nexus::SledInstanceState, Error>,
) -> Result<(), Error> {
let (.., authz_instance, db_instance) =
LookupPath::new(&opctx, &self.db_datastore)
.instance_id(*instance_id)
.lookup_for(authz::Action::Modify)
.await?;
let (.., authz_instance) = LookupPath::new(&opctx, &self.db_datastore)
.instance_id(*instance_id)
.lookup_for(authz::Action::Modify)
.await?;

let state = self
.db_datastore
.instance_fetch_with_vmm(opctx, &authz_instance)
.await?;

// TODO: add param for sled-agent to show its 'previous' and compare with this
// to validate consistency between nexus and sled-agent
let prev_instance_runtime = &state.instance().runtime_state;

self.handle_instance_put_result(
instance_id,
prev_instance_runtime,
result.map_err(Into::into), // TODO: this isn't real
).await?;
todo!()
match result {
Ok(new_state) => self
.db_datastore
.instance_and_vmm_update_runtime(
instance_id,
&new_state.instance_state.into(),
&new_state.propolis_id,
&new_state.vmm_state.into(),
)
.await
.map(|_| ()),
Err(error) => {
self.mark_instance_failed(
instance_id,
prev_instance_runtime,
error,
)
.await
}
}
}

/// Modifies the runtime state of the Instance as requested. This generally
Expand Down Expand Up @@ -1296,7 +1307,7 @@ impl super::Nexus {
&self,
instance_id: &Uuid,
prev_instance_runtime: &db::model::InstanceRuntimeState,
reason: &SledAgentInstancePutError,
reason: impl std::fmt::Debug,
) -> Result<(), Error> {
error!(self.log, "marking instance failed due to sled agent API error";
"instance_id" => %instance_id,
Expand Down
44 changes: 35 additions & 9 deletions nexus/src/internal_api/http_entrypoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ pub(crate) fn internal_api() -> NexusApiDescription {
api.register(physical_disk_delete)?;
api.register(zpool_put)?;
api.register(cpapi_instances_put)?;
api.register(cpapi_handle_instance_put_success)?;
api.register(cpapi_handle_instance_put_failure)?;
api.register(cpapi_disks_put)?;
api.register(cpapi_volume_remove_read_only_parent)?;
api.register(cpapi_disk_remove_read_only_parent)?;
Expand Down Expand Up @@ -269,24 +271,48 @@ async fn cpapi_instances_put(
apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await
}

/// Asynchronously report the result of a potentially long-running
/// Asynchronously report the successful result of a potentially long-running
/// instance_put call to sled-agent made during instance creation.
#[endpoint {
method = PUT,
path = "/instances/{instance_id}/creation-result",
method = PUT,
path = "/instances/{instance_id}/creation-success",
}]
async fn cpapi_handle_instance_put_success(
rqctx: RequestContext<Arc<ServerContext>>,
path_params: Path<InstancePathParam>,
instance_state: TypedBody<SledInstanceState>,
) -> Result<HttpResponseUpdatedNoContent, HttpError> {
let apictx = rqctx.context();
let nexus = &apictx.nexus;
let path = path_params.into_inner();
let result = Ok(instance_state.into_inner());
let opctx = crate::context::op_context_for_internal_api(&rqctx).await;
let handler = async {
nexus
.instance_handle_creation_result(&opctx, &path.instance_id, result)
.await?;
Ok(HttpResponseUpdatedNoContent())
};
apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await
}

/// Asynchronously report the unsuccessful result of a potentially long-running
/// instance_put call to sled-agent made during instance creation.
#[endpoint {
method = PUT,
path = "/instances/{instance_id}/creation-failure",
}]
async fn cpapi_handle_instance_put_result(
async fn cpapi_handle_instance_put_failure(
rqctx: RequestContext<Arc<ServerContext>>,
path_params: Path<InstancePathParam>,
result: TypedBody<Result<
Option<SledInstanceState>,
sled_agent_client::types::Error
>>,
error: TypedBody<String>,
) -> Result<HttpResponseUpdatedNoContent, HttpError> {
let apictx = rqctx.context();
let nexus = &apictx.nexus;
let path = path_params.into_inner();
let result = result.into_inner();
let result = Err(omicron_common::api::external::Error::InternalError {
internal_message: error.into_inner(),
});
let opctx = crate::context::op_context_for_internal_api(&rqctx).await;
let handler = async {
nexus
Expand Down
79 changes: 79 additions & 0 deletions openapi/nexus-internal.json
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,85 @@
}
}
},
"/instances/{instance_id}/creation-failure": {
"put": {
"summary": "Asynchronously report the unsuccessful result of a potentially long-running",
"description": "instance_put call to sled-agent made during instance creation.",
"operationId": "cpapi_handle_instance_put_failure",
"parameters": [
{
"in": "path",
"name": "instance_id",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"title": "String",
"type": "string"
}
}
},
"required": true
},
"responses": {
"204": {
"description": "resource updated"
},
"4XX": {
"$ref": "#/components/responses/Error"
},
"5XX": {
"$ref": "#/components/responses/Error"
}
}
}
},
"/instances/{instance_id}/creation-success": {
"put": {
"summary": "Asynchronously report the successful result of a potentially long-running",
"description": "instance_put call to sled-agent made during instance creation.",
"operationId": "cpapi_handle_instance_put_success",
"parameters": [
{
"in": "path",
"name": "instance_id",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/SledInstanceState"
}
}
},
"required": true
},
"responses": {
"204": {
"description": "resource updated"
},
"4XX": {
"$ref": "#/components/responses/Error"
},
"5XX": {
"$ref": "#/components/responses/Error"
}
}
}
},
"/metrics/collect/{producer_id}": {
"get": {
"summary": "Endpoint for oximeter to collect nexus server metrics.",
Expand Down
56 changes: 54 additions & 2 deletions sled-agent/src/instance_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -362,8 +362,60 @@ impl InstanceManager {
}
};

let new_state = instance.put_state(target).await?;
Ok(InstancePutStateResponse { updated_runtime: Some(new_state) })
match target {
// these may involve a long-running zone creation, so avoid HTTP
// request timeouts by decoupling the response
InstanceStateRequested::MigrationTarget(_)
| InstanceStateRequested::Running => {
let inst_mgr = self.inner.clone();
let log =
self.inner.log.new(o!("component" => "InstanceManager"));
tokio::spawn(async move {
let nexus_client = inst_mgr.nexus_client.client();
match instance.put_state(target).await {
Ok(state) => {
let instance_state: nexus_client::types::SledInstanceState = state.into();
if let Err(err) = nexus_client
.cpapi_handle_instance_put_success(
&instance_id,
&instance_state,
)
.await
{
error!(log, "Failed to inform Nexus of instance_put success";
"err" => %err,
"instance_state" => ?instance_state,
"instance_id" => %instance_id,
);
}
}
Err(instance_put_error) => {
if let Err(err) = nexus_client
.cpapi_handle_instance_put_failure(
&instance_id,
&instance_put_error.to_string(),
)
.await
{
error!(log, "Failed to inform Nexus of instance_put failure";
"err" => %err,
"instance_id" => %instance_id,
"instance_put_error" => %instance_put_error,
);
}
}
};
});
Ok(InstancePutStateResponse { updated_runtime: None })
}
InstanceStateRequested::Stopped
| InstanceStateRequested::Reboot => {
let new_state = instance.put_state(target).await?;
Ok(InstancePutStateResponse {
updated_runtime: Some(new_state),
})
}
}
}

/// Idempotently attempts to set the instance's migration IDs to the
Expand Down

0 comments on commit 3ff1329

Please sign in to comment.