From 722b6e76543d349fe2d82742308f0b78a4436cde Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Thu, 17 Oct 2024 23:48:38 +0000 Subject: [PATCH 1/7] Doc updates post compartmentalization Signed-off-by: Anuradha Karuppiah --- docs/CMakeLists.txt | 4 ++-- docs/source/getting_started.md | 4 ++++ docs/source/py_api.rst | 1 + docs/source/stages/morpheus_stages.md | 2 +- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index 41aa33a535..5e218fc989 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -30,7 +30,7 @@ add_custom_target(${PROJECT_NAME}_docs BUILD_DIR=${CMAKE_CURRENT_BINARY_DIR} ${SPHINX_EXECUTABLE} ${SPHINX_HTML_ARGS} ${SPHINX_SOURCE} ${SPHINX_BUILD} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} COMMENT "Generating documentation with Sphinx" - DEPENDS morpheus-package-outputs morpheus_llm-package-outputs + DEPENDS morpheus-package-outputs morpheus_llm-package-outputs morpheus_dfp-package-outputs ) add_custom_target(${PROJECT_NAME}_docs_linkcheck @@ -38,7 +38,7 @@ add_custom_target(${PROJECT_NAME}_docs_linkcheck BUILD_DIR=${CMAKE_CURRENT_BINARY_DIR} ${SPHINX_EXECUTABLE} ${SPHINX_LINKCHECK_ARGS} ${SPHINX_SOURCE} ${SPHINX_LINKCHECK_OUT} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} COMMENT "Checking documentation links with Sphinx" - DEPENDS morpheus-package-outputs morpheus_llm-package-outputs + DEPENDS morpheus-package-outputs morpheus_llm-package-outputs morpheus_dfp-package-outputs ) list(POP_BACK CMAKE_MESSAGE_CONTEXT) diff --git a/docs/source/getting_started.md b/docs/source/getting_started.md index b4d2b04cab..9478f7d426 100644 --- a/docs/source/getting_started.md +++ b/docs/source/getting_started.md @@ -19,6 +19,7 @@ limitations under the License. There are three ways to get started with Morpheus: - [Using pre-built Docker containers](#using-pre-built-docker-containers) +- [Using the morpheus conda packages](#using-morpheus-conda-packages) - [Building the Morpheus Docker container](#building-the-morpheus-container) - [Building Morpheus from source](./developer_guide/contributing.md#building-from-source) @@ -78,6 +79,9 @@ Once launched, users wishing to launch Triton using the included Morpheus models Skip ahead to the [Acquiring the Morpheus Models Container](#acquiring-the-morpheus-models-container) section. +## Using Morpheus Conda Packages +TBD + ## Building the Morpheus Container ### Clone the Repository diff --git a/docs/source/py_api.rst b/docs/source/py_api.rst index e4aa991db2..e37d340216 100644 --- a/docs/source/py_api.rst +++ b/docs/source/py_api.rst @@ -22,4 +22,5 @@ Python API :recursive: morpheus + morpheus_dfp morpheus_llm diff --git a/docs/source/stages/morpheus_stages.md b/docs/source/stages/morpheus_stages.md index db2d533606..e860beff38 100644 --- a/docs/source/stages/morpheus_stages.md +++ b/docs/source/stages/morpheus_stages.md @@ -66,7 +66,7 @@ Stages are the building blocks of Morpheus pipelines. Below is a list of the mos ## LLM -- LLM Engine Stage {py:class}`~morpheus.stages.llm.llm_engine_stage.LLMEngineStage` Execute an LLM engine within a Morpheus pipeline. +- LLM Engine Stage {py:class}`~morpheus_llm.stages.llm.llm_engine_stage.LLMEngineStage` Execute an LLM engine within a Morpheus pipeline. ## Output - HTTP Client Sink Stage {py:class}`~morpheus.stages.output.http_client_sink_stage.HttpClientSinkStage` Write all messages to an HTTP endpoint. From f796db16a1525ea9acf04088748314b37e6336fc Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Mon, 21 Oct 2024 18:26:09 +0000 Subject: [PATCH 2/7] Adjust DFP docstring indentations to fix sphinx check failures Signed-off-by: Anuradha Karuppiah --- .../morpheus_dfp/modules/dfp_deployment.py | 50 ++++++++--------- .../morpheus_dfp/modules/dfp_inference.py | 4 +- .../modules/dfp_inference_pipe.py | 48 ++++++++--------- .../modules/dfp_rolling_window.py | 6 +-- .../morpheus_dfp/modules/dfp_split_users.py | 6 +-- .../morpheus_dfp/modules/dfp_training.py | 6 +-- .../morpheus_dfp/modules/dfp_training_pipe.py | 54 +++++++++---------- 7 files changed, 87 insertions(+), 87 deletions(-) diff --git a/python/morpheus_dfp/morpheus_dfp/modules/dfp_deployment.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_deployment.py index 05611fbca0..21a0cfb96d 100644 --- a/python/morpheus_dfp/morpheus_dfp/modules/dfp_deployment.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_deployment.py @@ -50,8 +50,8 @@ def dfp_deployment(builder: mrc.Builder): - mlflow_writer_options (dict): Options for the MLflow model writer; Example: See Below - preprocessing_options (dict): Options for preprocessing the data; Example: See Below - stream_aggregation_options (dict): Options for aggregating the data by stream; Example: See Below - - timestamp_column_name (str): Name of the timestamp column used in the data; Example: "my_timestamp"; Default: - "timestamp" + - timestamp_column_name (str): Name of the timestamp column used in the data; Example: "my_timestamp"; + Default: "timestamp" - user_splitting_options (dict): Options for splitting the data by user; Example: See Below Inference Options Parameters: @@ -61,10 +61,10 @@ def dfp_deployment(builder: mrc.Builder): - fallback_username (str): User ID to use if user ID not found; Example: "generic_user"; Default: "generic_user" - inference_options (dict): Options for the inference module; Example: See Below - model_name_formatter (str): Format string for the model name; Example: "model_{timestamp}"; - Default: `[Required]` + Default: `[Required]` - num_output_ports (int): Number of output ports for the module; Example: 3 - timestamp_column_name (str): Name of the timestamp column in the input data; Example: "timestamp"; - Default: "timestamp" + Default: "timestamp" - stream_aggregation_options (dict): Options for aggregating the data by stream; Example: See Below - user_splitting_options (dict): Options for splitting the data by user; Example: See Below - write_to_file_options (dict): Options for writing the detections to a file; Example: See Below @@ -72,7 +72,7 @@ def dfp_deployment(builder: mrc.Builder): batching_options: - end_time (datetime/str): Endtime of the time window; Example: "2023-03-14T23:59:59"; Default: None - iso_date_regex_pattern (str): Regex pattern for ISO date matching; - Example: "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}"; Default: + Example: "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}"; Default: - parser_kwargs (dict): Additional arguments for the parser; Example: {}; Default: {} - period (str): Time period for grouping files; Example: "1d"; Default: "1d" - sampling_rate_s (int):: Sampling rate in seconds; Example: 0; Default: None @@ -82,18 +82,18 @@ def dfp_deployment(builder: mrc.Builder): - feature_columns (list): List of feature columns to train on; Example: ["column1", "column2", "column3"] - epochs (int): Number of epochs to train for; Example: 50 - model_kwargs (dict): Keyword arguments to pass to the model; Example: {"encoder_layers": [64, 32], - "decoder_layers": [32, 64], "activation": "relu", "swap_p": 0.1, "lr": 0.001, "lr_decay": 0.9, - "batch_size": 32, "verbose": 1, "optimizer": "adam", "scalar": "min_max", "min_cats": 10, - "progress_bar": false, "device": "cpu"} + "decoder_layers": [32, 64], "activation": "relu", "swap_p": 0.1, "lr": 0.001, "lr_decay": 0.9, + "batch_size": 32, "verbose": 1, "optimizer": "adam", "scalar": "min_max", "min_cats": 10, + "progress_bar": false, "device": "cpu"} - validation_size (float): Size of the validation set; Example: 0.1 mlflow_writer_options: - conda_env (str): Conda environment for the model; Example: `path/to/conda_env.yml`; Default: `[Required]` - databricks_permissions (dict): Permissions for the model; Example: See Below; Default: None - experiment_name_formatter (str): Formatter for the experiment name; Example: `experiment_name_{timestamp}`; - Default: `[Required]` + Default: `[Required]` - model_name_formatter (str): Formatter for the model name; Example: `model_name_{timestamp}`; - Default: `[Required]` + Default: `[Required]` - timestamp_column_name (str): Name of the timestamp column; Example: `timestamp`; Default: timestamp stream_aggregation_options: @@ -101,24 +101,24 @@ def dfp_deployment(builder: mrc.Builder): met. Otherwise, continue to aggregate user's history.; Example: 'batch'; Default: 'batch' - trigger_on_min_history (int): Minimum history to trigger a new training event; Example: 1; Default: 1 - trigger_on_min_increment (int): Minmum increment from the last trained to new training event; - Example: 0; Default: 0 + Example: 0; Default: 0 - timestamp_column_name (str): Name of the column containing timestamps; Example: 'timestamp'; - Default: 'timestamp' + Default: 'timestamp' - aggregation_span (str): Lookback timespan for training data in a new training event; Example: '60d'; - Default: '60d' + Default: '60d' - cache_to_disk (bool): Whether to cache streaming data to disk; Example: false; Default: false - cache_dir (str): Directory to use for caching streaming data; Example: './.cache'; Default: './.cache' user_splitting_options: - fallback_username (str): The user ID to use if the user ID is not found; Example: "generic_user"; - Default: 'generic_user' + Default: 'generic_user' - include_generic (bool): Whether to include a generic user ID in the output; Example: false; Default: False - include_individual (bool): Whether to include individual user IDs in the output; Example: true; Default: False - only_users (list): List of user IDs to include; others will be excluded; Example: ["user1", "user2", "user3"]; - Default: [] + Default: [] - skip_users (list): List of user IDs to exclude from the output; Example: ["user4", "user5"]; Default: [] - timestamp_column_name (str): Name of the column containing timestamps; Example: "timestamp"; - Default: 'timestamp' + Default: 'timestamp' - userid_column_name (str): Name of the column containing user IDs; Example: "username"; Default: 'username' detection_criteria: @@ -127,9 +127,9 @@ def dfp_deployment(builder: mrc.Builder): inference_options: - model_name_formatter (str): Formatter for model names; Example: "user_{username}_model"; - Default: `[Required]` + Default: `[Required]` - fallback_username (str): Fallback user to use if no model is found for a user; Example: "generic_user"; - Default: generic_user + Default: generic_user - timestamp_column_name (str): Name of the timestamp column; Example: "timestamp"; Default: timestamp write_to_file_options: @@ -141,19 +141,19 @@ def dfp_deployment(builder: mrc.Builder): monitoring_options: - description (str): Name to show for this Monitor Stage in the console window; Example: 'Progress'; - Default: 'Progress' + Default: 'Progress' - silence_monitors (bool): Slience the monitors on the console; Example: True; Default: False - smoothing (float): Smoothing parameter to determine how much the throughput should be averaged. - 0 = Instantaneous, 1 = Average.; Example: 0.01; Default: 0.05 + 0 = Instantaneous, 1 = Average.; Example: 0.01; Default: 0.05 - unit (str): Units to show in the rate value.; Example: 'messages'; Default: 'messages' - delayed_start (bool): When delayed_start is enabled, the progress bar will not be shown until the first - message is received. Otherwise, the progress bar is shown on pipeline startup and will begin timing - immediately. In large pipelines, this option may be desired to give a more accurate timing; - Example: True; Default: False + message is received. Otherwise, the progress bar is shown on pipeline startup and will begin timing + immediately. In large pipelines, this option may be desired to give a more accurate timing; + Example: True; Default: False - determine_count_fn_schema (str): Custom function for determining the count in a message. Gets called for - each message. Allows for correct counting of batched and sliced messages.; Example: func_str; Default: None + each message. Allows for correct counting of batched and sliced messages.; Example: func_str; Default: None - log_level (str): Enable this stage when the configured log level is at `log_level` or lower; - Example: 'DEBUG'; Default: INFO + Example: 'DEBUG'; Default: INFO """ # MODULE_INPUT_PORT diff --git a/python/morpheus_dfp/morpheus_dfp/modules/dfp_inference.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_inference.py index c710d09f9f..b0a37fcbc4 100644 --- a/python/morpheus_dfp/morpheus_dfp/modules/dfp_inference.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_inference.py @@ -46,9 +46,9 @@ def dfp_inference(builder: mrc.Builder): ---------- Configurable parameters: - model_name_formatter (str): Formatter for model names; Example: "user_{username}_model"; - Default: `[Required]` + Default: `[Required]` - fallback_username (str): Fallback user to use if no model is found for a user; Example: "generic_user"; - Default: generic_user + Default: generic_user - timestamp_column_name (str): Name of the timestamp column; Example: "timestamp"; Default: timestamp """ diff --git a/python/morpheus_dfp/morpheus_dfp/modules/dfp_inference_pipe.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_inference_pipe.py index 26c36ca763..d18809cb2b 100644 --- a/python/morpheus_dfp/morpheus_dfp/modules/dfp_inference_pipe.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_inference_pipe.py @@ -49,15 +49,15 @@ def dfp_inference_pipe(builder: mrc.Builder): Configurable parameters: - batching_options (dict): Options for batching the data; Example: See Below - cache_dir (str): Directory to cache the rolling window data; Example: "/path/to/cache/dir"; - Default: ./.cache + Default: ./.cache - detection_criteria (dict): Criteria for filtering detections; Example: See Below - fallback_username (str): User ID to use if user ID not found; Example: "generic_user"; - Default: "generic_user" + Default: "generic_user" - inference_options (dict): Options for the inference module; Example: See Below - model_name_formatter (str): Format string for the model name; Example: "model_{timestamp}"; - Default: `[Required]` + Default: `[Required]` - timestamp_column_name (str): Name of the timestamp column in the input data; Example: "timestamp"; - Default: "timestamp" + Default: "timestamp" - stream_aggregation_options (dict): Options for aggregating the data by stream; Example: See Below - user_splitting_options (dict): Options for splitting the data by user; Example: See Below - write_to_file_options (dict): Options for writing the detections to a file; Example: See Below @@ -66,12 +66,12 @@ def dfp_inference_pipe(builder: mrc.Builder): batching_options: - end_time (datetime/str): End time of the time window; Example: "2023-03-14T23:59:59"; Default: None - iso_date_regex_pattern (str): Regex pattern for ISO date matching; - Example: "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}"; Default: + Example: "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}"; Default: - parser_kwargs (dict): Additional arguments for the parser; Example: {}; Default: {} - period (str): Time period for grouping files; Example: "1d"; Default: "1d" - sampling_rate_s (int): Sampling rate in seconds; Example: 0; Default: None - start_time (datetime/str): Start time of the time window; Example: "2023-03-01T00:00:00"; - Default: None + Default: None detection_criteria: - copy (bool): Whether to copy the rows or slice them; Example: true; Default: true @@ -87,9 +87,9 @@ def dfp_inference_pipe(builder: mrc.Builder): inference_options: - model_name_formatter (str): Formatter for model names; Example: "user_{username}_model"; - Default: `[Required]` + Default: `[Required]` - fallback_username (str): Fallback user to use if no model is found for a user; Example: "generic_user"; - Default: generic_user + Default: generic_user - timestamp_column_name (str): Name of the timestamp column; Example: "timestamp"; Default: timestamp stream_aggregation_options: @@ -97,44 +97,44 @@ def dfp_inference_pipe(builder: mrc.Builder): are met. Otherwise, continue to aggregate user's history.; Example: 'batch'; Default: 'batch' - trigger_on_min_history (int): Minimum history to trigger a new training event; Example: 1; Default: 1 - trigger_on_min_increment (int): Minimum increment from the last trained to new training event; - Example: 0; Default: 0 + Example: 0; Default: 0 - timestamp_column_name (str): Name of the column containing timestamps; Example: 'timestamp'; - Default: 'timestamp' + Default: 'timestamp' - aggregation_span (str): Lookback timespan for training data in a new training event; Example: '60d'; - Default: '60d' + Default: '60d' - cache_to_disk (bool): Whether to cache streaming data to disk; Example: false; Default: false - cache_dir (str): Directory to use for caching streaming data; Example: './.cache'; Default: './.cache' user_splitting_options: - fallback_username (str): The user ID to use if the user ID is not found; Example: "generic_user"; - Default: 'generic_user' + Default: 'generic_user' - include_generic (bool): Whether to include a generic user ID in the output; Example: false; Default: False - include_individual (bool): Whether to include individual user IDs in the output; Example: true; - Default: False + Default: False - only_users (list): List of user IDs to include; others will be excluded; - Example: ["user1", "user2", "user3"]; - Default: [] + Example: ["user1", "user2", "user3"]; + Default: [] - skip_users (list): List of user IDs to exclude from the output; Example: ["user4", "user5"]; Default: [] - timestamp_column_name (str): Name of the column containing timestamps; Example: "timestamp"; - Default: 'timestamp' + Default: 'timestamp' - userid_column_name (str): Name of the column containing user IDs; Example: "username"; Default: 'username' monitor_options: - description (str): Name to show for this Monitor Stage in the console window; Example: 'Progress'; - Default: 'Progress' + Default: 'Progress' - silence_monitors (bool): Slience the monitors on the console; Example: True; Default: False - smoothing (float): Smoothing parameter to determine how much the throughput should be averaged. - 0 = Instantaneous, 1 = Average.; Example: 0.01; Default: 0.05 + 0 = Instantaneous, 1 = Average.; Example: 0.01; Default: 0.05 - unit (str): Units to show in the rate value.; Example: 'messages'; Default: 'messages' - delayed_start (bool): When delayed_start is enabled, the progress bar will not be shown until the first - message is received. Otherwise, the progress bar is shown on pipeline startup and will begin timing - immediately. In large pipelines, this option may be desired to give a more accurate timing; - Example: True; Default: False + message is received. Otherwise, the progress bar is shown on pipeline startup and will begin timing + immediately. In large pipelines, this option may be desired to give a more accurate timing; + Example: True; Default: False - determine_count_fn_schema (str): Custom function for determining the count in a message. Gets called - for each message. Allows for correct counting of batched and sliced messages.; Example: func_str; - Default: None + for each message. Allows for correct counting of batched and sliced messages.; Example: func_str; + Default: None - log_level (str): Enable this stage when the configured log level is at `log_level` or lower; - Example: 'DEBUG'; Default: INFO + Example: 'DEBUG'; Default: INFO write_to_file_options: - filename (str): Path to the output file; Example: `output.csv`; Default: None diff --git a/python/morpheus_dfp/morpheus_dfp/modules/dfp_rolling_window.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_rolling_window.py index ea9b1fede8..27f4054143 100644 --- a/python/morpheus_dfp/morpheus_dfp/modules/dfp_rolling_window.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_rolling_window.py @@ -51,11 +51,11 @@ def dfp_rolling_window(builder: mrc.Builder): met. Otherwise, continue to aggregate user's history.; Example: 'batch'; Default: 'batch' - trigger_on_min_history (int): Minimum history to trigger a new training event; Example: 1; Default: 1 - trigger_on_min_increment (int): Minmum increment from the last trained to new training event; - Example: 0; Default: 0 + Example: 0; Default: 0 - timestamp_column_name (str): Name of the column containing timestamps; Example: 'timestamp'; - Default: 'timestamp' + Default: 'timestamp' - aggregation_span (str): Lookback timespan for training data in a new training event; Example: '60d'; - Default: '60d' + Default: '60d' - cache_to_disk (bool): Whether to cache streaming data to disk; Example: false; Default: false - cache_dir (str): Directory to use for caching streaming data; Example: './.cache'; Default: './.cache' """ diff --git a/python/morpheus_dfp/morpheus_dfp/modules/dfp_split_users.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_split_users.py index 7ec8c7f0f4..276113cfb8 100644 --- a/python/morpheus_dfp/morpheus_dfp/modules/dfp_split_users.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_split_users.py @@ -45,14 +45,14 @@ def dfp_split_users(builder: mrc.Builder): ----- Configurable parameters: - fallback_username (str): The user ID to use if the user ID is not found; Example: "generic_user"; - Default: 'generic_user' + Default: 'generic_user' - include_generic (bool): Whether to include a generic user ID in the output; Example: false; Default: False - include_individual (bool): Whether to include individual user IDs in the output; Example: true; Default: False - only_users (list): List of user IDs to include; others will be excluded; Example: ["user1", "user2", "user3"]; - Default: [] + Default: [] - skip_users (list): List of user IDs to exclude from the output; Example: ["user4", "user5"]; Default: [] - timestamp_column_name (str): Name of the column containing timestamps; Example: "timestamp"; - Default: 'timestamp' + Default: 'timestamp' - userid_column_name (str): Name of the column containing user IDs; Example: "username"; Default: 'username' """ diff --git a/python/morpheus_dfp/morpheus_dfp/modules/dfp_training.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_training.py index 6bc41d1d09..20eebf5570 100644 --- a/python/morpheus_dfp/morpheus_dfp/modules/dfp_training.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_training.py @@ -46,9 +46,9 @@ def dfp_training(builder: mrc.Builder): - feature_columns (list): List of feature columns to train on; Example: ["column1", "column2", "column3"] - epochs (int): Number of epochs to train for; Example: 50 - model_kwargs (dict): Keyword arguments to pass to the model; Example: {"encoder_layers": [64, 32], - "decoder_layers": [32, 64], "activation": "relu", "swap_p": 0.1, "lr": 0.001, "lr_decay": 0.9, - "batch_size": 32, "verbose": 1, "optimizer": "adam", "scalar": "min_max", "min_cats": 10, - "progress_bar": false, "device": "cpu"} + "decoder_layers": [32, 64], "activation": "relu", "swap_p": 0.1, "lr": 0.001, "lr_decay": 0.9, + "batch_size": 32, "verbose": 1, "optimizer": "adam", "scalar": "min_max", "min_cats": 10, + "progress_bar": false, "device": "cpu"} - validation_size (float): Size of the validation set; Example: 0.1 """ diff --git a/python/morpheus_dfp/morpheus_dfp/modules/dfp_training_pipe.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_training_pipe.py index cfa4c49fdc..394ae0a12b 100644 --- a/python/morpheus_dfp/morpheus_dfp/modules/dfp_training_pipe.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_training_pipe.py @@ -46,43 +46,43 @@ def dfp_training_pipe(builder: mrc.Builder): Configurable parameters: - batching_options (dict): Options for batching the data; Example: See Below - cache_dir (str): Directory to cache the rolling window data; Example: "/path/to/cache/dir"; - Default: ./.cache + Default: ./.cache - dfencoder_options (dict): Options for configuring the data frame encoder; Example: See Below - mlflow_writer_options (dict): Options for the MLflow model writer; Example: See Below - stream_aggregation_options (dict): Options for aggregating the data by stream; Example: See Below - timestamp_column_name (str): Name of the timestamp column used in the data; Example: "my_timestamp"; - Default: "timestamp" + Default: "timestamp" - user_splitting_options (dict): Options for splitting the data by user; Example: See Below - monitor_options (dict): Options for monitoring throughput; Example: See Below batching_options: - end_time (datetime/string): Endtime of the time window; Example: "2023-03-14T23:59:59"; Default: None - iso_date_regex_pattern (str): Regex pattern for ISO date matching; - Example: "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}"; Default: + Example: "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}"; Default: - parser_kwargs (dict): Additional arguments for the parser; Example: {}; Default: {} - period (str): Time period for grouping files; Example: "1d"; Default: "1d" - sampling_rate_s (int): Sampling rate in seconds; Example: 0; Default: None - start_time (datetime/string): Start time of the time window; Example: "2023-03-01T00:00:00"; - Default: Nome + Default: Nome dfencoder_options: - feature_columns (list): List of feature columns to train on; Example: ["column1", "column2", "column3"] - epochs (int): Number of epochs to train for; Example: 50 - model_kwargs (dict): Keyword arguments to pass to the model; Example: {"encoder_layers": [64, 32], - "decoder_layers": [32, 64], "activation": "relu", "swap_p": 0.1, "lr": 0.001, "lr_decay": 0.9, - "batch_size": 32, "verbose": 1, "optimizer": "adam", "scalar": "min_max", "min_cats": 10, - "progress_bar": false, "device": "cpu"} + "decoder_layers": [32, 64], "activation": "relu", "swap_p": 0.1, "lr": 0.001, "lr_decay": 0.9, + "batch_size": 32, "verbose": 1, "optimizer": "adam", "scalar": "min_max", "min_cats": 10, + "progress_bar": false, "device": "cpu"} - validation_size (float): Size of the validation set; Example: 0.1 mlflow_writer_options: - conda_env (str): Conda environment for the model; Example: `path/to/conda_env.yml`; - Default: `[Required]` + Default: `[Required]` - databricks_permissions (dict): Permissions for the model; Example: See Below; Default: None - experiment_name_formatter (str): Formatter for the experiment name; - Example: `experiment_name_{timestamp}`; - Default: `[Required]` + Example: `experiment_name_{timestamp}`; + Default: `[Required]` - model_name_formatter (str): Formatter for the model name; Example: `model_name_{timestamp}`; - Default: `[Required]` + Default: `[Required]` - timestamp_column_name (str): Name of the timestamp column; Example: `timestamp`; Default: timestamp stream_aggregation_options: @@ -90,44 +90,44 @@ def dfp_training_pipe(builder: mrc.Builder): are met. Otherwise, continue to aggregate user's history.; Example: 'batch'; Default: 'batch' - trigger_on_min_history (int): Minimum history to trigger a new training event; Example: 1; Default: 1 - trigger_on_min_increment (int): Minimum increment from the last trained to new training event; - Example: 0; Default: 0 + Example: 0; Default: 0 - timestamp_column_name (str): Name of the column containing timestamps; Example: 'timestamp'; - Default: 'timestamp' + Default: 'timestamp' - aggregation_span (str): Lookback timespan for training data in a new training event; Example: '60d'; - Default: '60d' + Default: '60d' - cache_to_disk (bool): Whether to cache streaming data to disk; Example: false; Default: false - cache_dir (str): Directory to use for caching streaming data; Example: './.cache'; Default: './.cache' user_splitting_options: - fallback_username (str): The user ID to use if the user ID is not found; Example: "generic_user"; - Default: 'generic_user' + Default: 'generic_user' - include_generic (bool): Whether to include a generic user ID in the output; Example: false; Default: False - include_individual (bool): Whether to include individual user IDs in the output; Example: true; - Default: False + Default: False - only_users (list): List of user IDs to include; others will be excluded; - Example: ["user1", "user2", "user3"]; - Default: [] + Example: ["user1", "user2", "user3"]; + Default: [] - skip_users (list): List of user IDs to exclude from the output; Example: ["user4", "user5"]; Default: [] - timestamp_column_name (str): Name of the column containing timestamps; Example: "timestamp"; - Default: 'timestamp' + Default: 'timestamp' - userid_column_name (str): Name of the column containing user IDs; Example: "username"; Default: 'username' monitor_options: - description (str): Name to show for this Monitor Stage in the console window; Example: 'Progress'; - Default: 'Progress' + Default: 'Progress' - silence_monitors (bool): Slience the monitors on the console; Example: True; Default: False - smoothing (float): Smoothing parameter to determine how much the throughput should be averaged. - 0 = Instantaneous, 1 = Average.; Example: 0.01; Default: 0.05 + 0 = Instantaneous, 1 = Average.; Example: 0.01; Default: 0.05 - unit (str): Units to show in the rate value.; Example: 'messages'; Default: 'messages' - delayed_start (bool): When delayed_start is enabled, the progress bar will not be shown until the first - message is received. Otherwise, the progress bar is shown on pipeline startup and will begin timing - immediately. In large pipelines, this option may be desired to give a more accurate timing; - Example: True; Default: False + message is received. Otherwise, the progress bar is shown on pipeline startup and will begin timing + immediately. In large pipelines, this option may be desired to give a more accurate timing; + Example: True; Default: False - determine_count_fn_schema (str): Custom function for determining the count in a message. Gets called - for each message. Allows for correct counting of batched and sliced messages.; Example: func_str; - Default: None + for each message. Allows for correct counting of batched and sliced messages.; Example: func_str; + Default: None - log_level (str): Enable this stage when the configured log level is at `log_level` or lower; - Example: 'DEBUG'; Default: INFO + Example: 'DEBUG'; Default: INFO """ # MODULE_INPUT_PORT From 2e77c3410ee1e5895a6076d68cbc3a479554bbd0 Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Mon, 21 Oct 2024 01:55:20 +0000 Subject: [PATCH 3/7] Add documentation for the morpheus conda packages Signed-off-by: Anuradha Karuppiah --- docs/source/conda_packages.md | 123 +++++++++++++++++++++++++++++++++ docs/source/getting_started.md | 7 +- docs/source/index.rst | 2 + 3 files changed, 130 insertions(+), 2 deletions(-) create mode 100644 docs/source/conda_packages.md diff --git a/docs/source/conda_packages.md b/docs/source/conda_packages.md new file mode 100644 index 0000000000..8ed8075ae0 --- /dev/null +++ b/docs/source/conda_packages.md @@ -0,0 +1,123 @@ +# Morpheus Conda Packages +The Morpheus stages are the building blocks for creating pipelines. The stages are organized into libraries by use case. The current libraries are: +- morpheus-core +- morpheus-dfp +- morpheus-llm + +The libraries are hosted as conda packages on the [nvidia](https://anaconda.org/nvidia/) channel. + +The split into multiple libraries allows for a more modular approach to using the Morpheus stages. For example, if you are building an application for Digital Finger Printing, you can install just the `morpheus-dfp` library. This reduces the size of the installed package. It also limits the dependencies eliminating unnecessary version conflicts. + + +## Morpheus Core +The `morpheus-core` library contains the core stages that are common across all use cases. The Morpheus core library is built from the source code in the `python/morpheus` directory of the Morpheus repository. The core library is installed as a dependency when you install any of the other Morpheus libraries. +To set up a conda environment with the [morpheus-core](https://anaconda.org/nvidia/morpheus-core) library you can run the following commands: +### Create a conda environment +```bash +export CONDA_ENV_NAME=morpheus +conda create -n $CONDA_ENV_NAME python=3.10 +conda activate $CONDA_ENV_NAME +``` +### Add conda channels +These channel are required for installing the runtime dependencies +```bash +conda config --env --add channels conda-forge &&\ + conda config --env --add channels nvidia &&\ + conda config --env --add channels rapidsai &&\ + conda config --env --add channels pytorch +``` +### Install the morpheus-core library +```bash +conda install -c nvidia morpheus-core +``` +The `morpheus-core` conda package installs the `morpheus` python package. It also pulls down all the necessary conda runtime dependencies for the core stages including [mrc](https://anaconda.org/nvidia/mrc) and [libmrc](https://anaconda.org/nvidia/libmrc). +### Install additional pypi dependencies +Some of the stages in the core library require additional dependencies that are hosted on Pypi. These dependencies are included as a requirements file in the morpheus python package. The requirements files can be located and installed by running the following command: +```bash +python3 < --dfp +``` +```bash +python scripts/morpheus_namespace_update.py --directory --llm +``` diff --git a/docs/source/getting_started.md b/docs/source/getting_started.md index 9478f7d426..f97a30350e 100644 --- a/docs/source/getting_started.md +++ b/docs/source/getting_started.md @@ -19,7 +19,7 @@ limitations under the License. There are three ways to get started with Morpheus: - [Using pre-built Docker containers](#using-pre-built-docker-containers) -- [Using the morpheus conda packages](#using-morpheus-conda-packages) +- [Using the Morpheus Conda packages](#using-morpheus-conda-packages) - [Building the Morpheus Docker container](#building-the-morpheus-container) - [Building Morpheus from source](./developer_guide/contributing.md#building-from-source) @@ -80,7 +80,10 @@ Once launched, users wishing to launch Triton using the included Morpheus models Skip ahead to the [Acquiring the Morpheus Models Container](#acquiring-the-morpheus-models-container) section. ## Using Morpheus Conda Packages -TBD +The Morpheus stages are available as libraries that are hosted as conda packages on the [NVIDIA](https://anancon) channel. The Morpheus conda packages are: +[morpheus-core](https://anaconda.org/nvidia/morpheus-core), [morpheus-dfp](https://anaconda.org/nvidia/morpheus-dfp) and [morpheus-llm](https://anaconda.org/nvidia/morpheus-llm) + +For details on these libraries and how to use them, refer to the [Morpheus Conda Packages](./conda_packages.md) guide. ## Building the Morpheus Container ### Clone the Repository diff --git a/docs/source/index.rst b/docs/source/index.rst index fae48cc6b0..dce4a88bfd 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -54,6 +54,7 @@ Getting Started Using Morpheus ^^^^^^^^^^^^^^ * :doc:`getting_started` - Using pre-built Docker containers, building Docker containers from source, and fetching models and datasets + * :doc:`Morpheus Conda Packages `- Using Morpheus Libraries via the pre-built Conda Packages * :doc:`basics/overview` - Brief overview of the command line interface * :doc:`basics/building_a_pipeline` - Introduction to building a pipeline using the command line interface * :doc:`Morpheus Examples ` - Example pipelines using both the Python API and command line interface @@ -76,6 +77,7 @@ Deploying Morpheus :hidden: getting_started + conda_packages basics/overview basics/building_a_pipeline models_and_datasets From 8b40cb28f2a2b3fd9afeddf22be5f6c653d7ec82 Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Mon, 21 Oct 2024 20:04:49 +0000 Subject: [PATCH 4/7] Add copyrights header Signed-off-by: Anuradha Karuppiah --- docs/source/conda_packages.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/docs/source/conda_packages.md b/docs/source/conda_packages.md index 8ed8075ae0..b008d52184 100644 --- a/docs/source/conda_packages.md +++ b/docs/source/conda_packages.md @@ -1,3 +1,20 @@ + + # Morpheus Conda Packages The Morpheus stages are the building blocks for creating pipelines. The stages are organized into libraries by use case. The current libraries are: - morpheus-core From dba406cbb6e2375ca03dbdbe7165e9ae1ac4b4ee Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Mon, 21 Oct 2024 21:38:23 +0000 Subject: [PATCH 5/7] Fix vale syntax check failures Signed-off-by: Anuradha Karuppiah --- docs/source/conda_packages.md | 54 +++++++++++++++++----------------- docs/source/getting_started.md | 4 +-- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/docs/source/conda_packages.md b/docs/source/conda_packages.md index b008d52184..ff3a03d83b 100644 --- a/docs/source/conda_packages.md +++ b/docs/source/conda_packages.md @@ -17,25 +17,25 @@ limitations under the License. # Morpheus Conda Packages The Morpheus stages are the building blocks for creating pipelines. The stages are organized into libraries by use case. The current libraries are: -- morpheus-core -- morpheus-dfp -- morpheus-llm +- `morpheus-core` +- `morpheus-dfp` +- `morpheus-llm` -The libraries are hosted as conda packages on the [nvidia](https://anaconda.org/nvidia/) channel. +The libraries are hosted as Conda packages on the [`nvidia`](https://anaconda.org/nvidia/) channel. The split into multiple libraries allows for a more modular approach to using the Morpheus stages. For example, if you are building an application for Digital Finger Printing, you can install just the `morpheus-dfp` library. This reduces the size of the installed package. It also limits the dependencies eliminating unnecessary version conflicts. ## Morpheus Core The `morpheus-core` library contains the core stages that are common across all use cases. The Morpheus core library is built from the source code in the `python/morpheus` directory of the Morpheus repository. The core library is installed as a dependency when you install any of the other Morpheus libraries. -To set up a conda environment with the [morpheus-core](https://anaconda.org/nvidia/morpheus-core) library you can run the following commands: -### Create a conda environment +To set up a Conda environment with the [`morpheus-core`](https://anaconda.org/nvidia/morpheus-core) library you can run the following commands: +### Create a Conda environment ```bash export CONDA_ENV_NAME=morpheus conda create -n $CONDA_ENV_NAME python=3.10 conda activate $CONDA_ENV_NAME ``` -### Add conda channels +### Add Conda channels These channel are required for installing the runtime dependencies ```bash conda config --env --add channels conda-forge &&\ @@ -43,13 +43,13 @@ conda config --env --add channels conda-forge &&\ conda config --env --add channels rapidsai &&\ conda config --env --add channels pytorch ``` -### Install the morpheus-core library +### Install the `morpheus-core` library ```bash conda install -c nvidia morpheus-core ``` -The `morpheus-core` conda package installs the `morpheus` python package. It also pulls down all the necessary conda runtime dependencies for the core stages including [mrc](https://anaconda.org/nvidia/mrc) and [libmrc](https://anaconda.org/nvidia/libmrc). -### Install additional pypi dependencies -Some of the stages in the core library require additional dependencies that are hosted on Pypi. These dependencies are included as a requirements file in the morpheus python package. The requirements files can be located and installed by running the following command: +The `morpheus-core` Conda package installs the `morpheus` python package. It also pulls down all the necessary Conda runtime dependencies for the core stages including [`mrc`](https://anaconda.org/nvidia/mrc) and [`libmrc`](https://anaconda.org/nvidia/libmrc). +### Install additional `Pypi` dependencies +Some of the stages in the core library require additional dependencies that are hosted on `Pypi`. These dependencies are included as a requirements file in the `morpheus` python package. The requirements files can be located and installed by running the following command: ```bash python3 < Date: Mon, 21 Oct 2024 23:00:54 +0000 Subject: [PATCH 6/7] Change "Pypi" to "PyPI" and add a vale accept keyword for it Signed-off-by: Anuradha Karuppiah --- .../styles/config/vocabularies/morpheus/accept.txt | 1 + docs/source/conda_packages.md | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/ci/vale/styles/config/vocabularies/morpheus/accept.txt b/ci/vale/styles/config/vocabularies/morpheus/accept.txt index 285a85c7d8..91dfb44e43 100644 --- a/ci/vale/styles/config/vocabularies/morpheus/accept.txt +++ b/ci/vale/styles/config/vocabularies/morpheus/accept.txt @@ -46,6 +46,7 @@ LLM(s?) # https://github.com/logpai/loghub/ Loghub Milvus +PyPI [Mm]ixin MLflow Morpheus diff --git a/docs/source/conda_packages.md b/docs/source/conda_packages.md index ff3a03d83b..3759bd196a 100644 --- a/docs/source/conda_packages.md +++ b/docs/source/conda_packages.md @@ -48,8 +48,8 @@ conda config --env --add channels conda-forge &&\ conda install -c nvidia morpheus-core ``` The `morpheus-core` Conda package installs the `morpheus` python package. It also pulls down all the necessary Conda runtime dependencies for the core stages including [`mrc`](https://anaconda.org/nvidia/mrc) and [`libmrc`](https://anaconda.org/nvidia/libmrc). -### Install additional `Pypi` dependencies -Some of the stages in the core library require additional dependencies that are hosted on `Pypi`. These dependencies are included as a requirements file in the `morpheus` python package. The requirements files can be located and installed by running the following command: +### Install additional PyPI dependencies +Some of the stages in the core library require additional dependencies that are hosted on PyPI. These dependencies are included as a requirements file in the `morpheus` python package. The requirements files can be located and installed by running the following command: ```bash python3 < Date: Mon, 21 Oct 2024 23:23:51 +0000 Subject: [PATCH 7/7] Make the pypi package install more bash friendly Signed-off-by: Anuradha Karuppiah --- docs/source/conda_packages.md | 36 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/docs/source/conda_packages.md b/docs/source/conda_packages.md index 3759bd196a..1ea5f65828 100644 --- a/docs/source/conda_packages.md +++ b/docs/source/conda_packages.md @@ -32,8 +32,8 @@ To set up a Conda environment with the [`morpheus-core`](https://anaconda.org/nv ### Create a Conda environment ```bash export CONDA_ENV_NAME=morpheus -conda create -n $CONDA_ENV_NAME python=3.10 -conda activate $CONDA_ENV_NAME +conda create -n ${CONDA_ENV_NAME} python=3.10 +conda activate ${CONDA_ENV_NAME} ``` ### Add Conda channels These channel are required for installing the runtime dependencies @@ -51,12 +51,8 @@ The `morpheus-core` Conda package installs the `morpheus` python package. It als ### Install additional PyPI dependencies Some of the stages in the core library require additional dependencies that are hosted on PyPI. These dependencies are included as a requirements file in the `morpheus` python package. The requirements files can be located and installed by running the following command: ```bash -python3 <