Skip to content

Commit

Permalink
Add ML Profile APIs (opensearch-project#787)
Browse files Browse the repository at this point in the history
* Added ML get profile APIs.

Signed-off-by: Nathalie Jonathan <[email protected]>

* Modified response payload state from RUNNING to COMPLETED, updated PR link in CHANGELOG.md.

Signed-off-by: Nathalie Jonathan <[email protected]>

* Rebase, changed native memory threshold settings to JVP heap memory threshold, fixed vale errors.

Signed-off-by: Nathalie Jonathan <[email protected]>

* Renamed files to follow the naming convention.

Signed-off-by: Nathalie Jonathan <[email protected]>

---------

Signed-off-by: Nathalie Jonathan <[email protected]>
  • Loading branch information
nathaliellenaa authored Jan 14, 2025
1 parent 5e22b8b commit 806b25f
Show file tree
Hide file tree
Showing 6 changed files with 455 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
- Added support for evaluating response payloads in prologues and epilogues ([#772](https://github.com/opensearch-project/opensearch-api-specification/pull/772))
- Added `GET /_plugins/_ml/models/{model_id}`, `POST /_plugins/_ml/models/_search`, `POST /_plugins/_ml/models/_unload`, `_undeploy`, `_upload`, `meta`, `_register_meta`, `POST /_plugins/_ml/models/{model_id}/_load`, `_predict`, `_unload`, `chunk/{chunk_number}`, `upload_chunk/{chunk_number}`, and `PUT /_plugins/_ml/models/{model_id}` ([#733](https://github.com/opensearch-project/opensearch-api-specification/pull/733))
- Added `GET`, `POST`, `PUT`, `DELETE /_plugins/_ml/controllers/{model_id}` ([#779](https://github.com/opensearch-project/opensearch-api-specification/pull/779))
- Added `GET /_plugins/_ml/profile`, `GET /_plugins/_ml/profile/models`, `models/{model_id}`, `tasks`, `tasks/{task_id}` ([#787](https://github.com/opensearch-project/opensearch-api-specification/pull/787))

### Removed
- Removed unsupported `_common.mapping:SourceField`'s `mode` field and associated `_common.mapping:SourceFieldMode` enum ([#652](https://github.com/opensearch-project/opensearch-api-specification/pull/652))
Expand Down
101 changes: 101 additions & 0 deletions spec/namespaces/ml.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,65 @@ paths:
responses:
'200':
$ref: '#/components/responses/ml.delete_controller@200'
/_plugins/_ml/profile:
get:
operationId: ml.get_profile.0
x-operation-group: ml.get_profile
x-version-added: '2.4'
description: Get a profile.
requestBody:
$ref: '#/components/requestBodies/ml.get_profile'
responses:
'200':
$ref: '#/components/responses/ml.get_profile@200'
/_plugins/_ml/profile/models:
get:
operationId: ml.get_profile_models.0
x-operation-group: ml.get_profile_models
x-version-added: '2.4'
description: Get a profile models.
requestBody:
$ref: '#/components/requestBodies/ml.get_profile_models'
responses:
'200':
$ref: '#/components/responses/ml.get_profile_models@200'
/_plugins/_ml/profile/models/{model_id}:
get:
operationId: ml.get_profile_models.1
x-operation-group: ml.get_profile_models
x-version-added: '2.11'
description: Get a profile models.
requestBody:
$ref: '#/components/requestBodies/ml.get_profile_models'
parameters:
- $ref: '#/components/parameters/ml.get_profile_models::path.model_id'
responses:
'200':
$ref: '#/components/responses/ml.get_profile_models@200'
/_plugins/_ml/profile/tasks:
get:
operationId: ml.get_profile_tasks.0
x-operation-group: ml.get_profile_tasks
x-version-added: '2.4'
description: Get a profile tasks.
requestBody:
$ref: '#/components/requestBodies/ml.get_profile_tasks'
responses:
'200':
$ref: '#/components/responses/ml.get_profile_tasks@200'
/_plugins/_ml/profile/tasks/{task_id}:
get:
operationId: ml.get_profile_tasks.1
x-operation-group: ml.get_profile_tasks
x-version-added: '2.11'
description: Get a profile tasks.
requestBody:
$ref: '#/components/requestBodies/ml.get_profile_tasks'
parameters:
- $ref: '#/components/parameters/ml.get_profile_tasks::path.task_id'
responses:
'200':
$ref: '#/components/responses/ml.get_profile_tasks@200'
components:
requestBodies:
ml.register_model_group:
Expand Down Expand Up @@ -1234,6 +1293,21 @@ components:
$ref: '../schemas/ml._common.yaml#/components/schemas/UserRateLimiter'
model_id:
$ref: '../schemas/_common.yaml#/components/schemas/Name'
ml.get_profile:
content:
application/json:
schema:
$ref: '../schemas/ml._common.yaml#/components/schemas/ProfileRequest'
ml.get_profile_models:
content:
application/json:
schema:
$ref: '../schemas/ml._common.yaml#/components/schemas/ProfileRequest'
ml.get_profile_tasks:
content:
application/json:
schema:
$ref: '../schemas/ml._common.yaml#/components/schemas/ProfileRequest'
responses:
ml.register_model_group@200:
content:
Expand Down Expand Up @@ -1571,6 +1645,21 @@ components:
application/json:
schema:
$ref: '../schemas/_common.yaml#/components/schemas/WriteResponseBase'
ml.get_profile@200:
content:
application/json:
schema:
$ref: '../schemas/ml._common.yaml#/components/schemas/GetProfileResponse'
ml.get_profile_models@200:
content:
application/json:
schema:
$ref: '../schemas/ml._common.yaml#/components/schemas/GetProfileResponse'
ml.get_profile_tasks@200:
content:
application/json:
schema:
$ref: '../schemas/ml._common.yaml#/components/schemas/GetProfileResponse'
parameters:
ml.get_model_group::path.model_group_id:
name: model_group_id
Expand Down Expand Up @@ -1800,5 +1889,17 @@ components:
name: model_id
in: path
required: true
schema:
type: string
ml.get_profile_models::path.model_id:
name: model_id
in: path
required: true
schema:
type: string
ml.get_profile_tasks::path.task_id:
name: task_id
in: path
required: true
schema:
type: string
113 changes: 112 additions & 1 deletion spec/schemas/ml._common.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1256,4 +1256,115 @@ components:
- MILLISECONDS
- MINUTES
- NANOSECONDS
- SECONDS
- SECONDS
ProfileRequest:
type: object
properties:
node_ids:
type: array
items:
$ref: '_common.yaml#/components/schemas/Id'
model_ids:
type: array
items:
$ref: '_common.yaml#/components/schemas/Id'
task_ids:
type: array
items:
$ref: '_common.yaml#/components/schemas/Id'
return_all_tasks:
type: boolean
description: Whether to return all tasks.
return_all_models:
type: boolean
description: Whether to return all models.
GetProfileResponse:
type: object
properties:
nodes:
$ref: '#/components/schemas/Nodes'
Nodes:
type: object
additionalProperties:
$ref: '#/components/schemas/Node'
Node:
type: object
properties:
tasks:
$ref: '#/components/schemas/Tasks'
models:
$ref: '#/components/schemas/Models'
Models:
type: object
additionalProperties:
$ref: '#/components/schemas/ModelProfile'
ModelProfile:
type: object
properties:
model_state:
type: string
description: The model state.
enum:
- DEPLOYED
- DEPLOYING
- DEPLOY_FAILED
- PARTIALLY_DEPLOYED
- REGISTERED
- REGISTERING
- UNDEPLOYED
predictor:
type: string
description: The predictor.
worker_nodes:
type: array
items:
$ref: '_common.yaml#/components/schemas/NodeIds'
predict_request_stats:
$ref: '#/components/schemas/PredictRequestStats'
target_worker_nodes:
type: array
items:
$ref: '_common.yaml#/components/schemas/NodeIds'
memory_size_estimation_cpu:
type: integer
format: int64
description: The estimated memory size in CPU.
memory_size_estimation_gpu:
type: integer
format: int64
description: The estimated memory size in GPU.
PredictRequestStats:
type: object
properties:
count:
type: integer
format: int64
description: The total predict requests on this node.
max:
type: number
format: double
description: The maximum latency in milliseconds.
min:
type: number
format: double
description: The minimum latency in milliseconds.
average:
type: number
format: double
description: The average latency in milliseconds.
p50:
type: number
format: double
description: The 50th percentile latency in milliseconds.
p90:
type: number
format: double
description: The 90th percentile latency in milliseconds.
p99:
type: number
format: double
description: The 99th percentile latency in milliseconds.
Tasks:
type: object
additionalProperties:
$ref: '#/components/schemas/Task'
59 changes: 59 additions & 0 deletions tests/plugins/ml/ml/profile.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
$schema: ../../../../json_schemas/test_story.schema.yaml

description: Test the retrieval of runtime information about ML tasks and models.
version: '>= 2.7'
prologues:
- path: /_cluster/settings
method: PUT
request:
payload:
persistent:
plugins.ml_commons.jvm_heap_memory_threshold: 100
- path: /_plugins/_ml/models/_register
id: register_model
method: POST
request:
payload:
name: huggingface/sentence-transformers/msmarco-distilbert-base-tas-b
version: 1.0.1
model_format: TORCH_SCRIPT
output:
task_id: payload.task_id
- path: /_plugins/_ml/tasks/{task_id}
id: get_completed_task
method: GET
parameters:
task_id: ${register_model.task_id}
retry:
count: 3
wait: 10000
response:
status: 200
payload:
state: COMPLETED
output:
model_id: payload.model_id
node_id: payload.worker_node[0]
epilogues:
- path: /_plugins/_ml/models/{model_id}
parameters:
model_id: ${get_completed_task.model_id}
method: DELETE
status: [200, 404]
- path: /_plugins/_ml/tasks/{task_id}
parameters:
task_id: ${register_model.task_id}
method: DELETE
status: [200, 404]
chapters:
- synopsis: Get runtime information of all tasks and models on a specific node.
path: /_plugins/_ml/profile
method: GET
request:
payload:
node_ids:
- ${get_completed_task.node_id}
return_all_tasks: true
return_all_models: true
response:
status: 200
Loading

0 comments on commit 806b25f

Please sign in to comment.