From f2aa957df9c512c34d8127e76a952eede0751b46 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 1 Nov 2024 13:01:38 -0700 Subject: [PATCH 01/12] Display the diff report on collumn differences not just rows --- python/morpheus/morpheus/utils/compare_df.py | 5 +++-- scripts/compare_data_files.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/python/morpheus/morpheus/utils/compare_df.py b/python/morpheus/morpheus/utils/compare_df.py index 605b515edf..5541e0ecd4 100755 --- a/python/morpheus/morpheus/utils/compare_df.py +++ b/python/morpheus/morpheus/utils/compare_df.py @@ -130,6 +130,7 @@ def compare_df(df_a: pd.DataFrame, total_rows = len(df_a_filtered) diff_rows = len(df_a_filtered) - int(comparison.count_matching_rows()) + diff_cols = len(extra_columns) + len(missing_columns) if (comparison.matches()): logger.info("Results match validation dataset") @@ -141,7 +142,7 @@ def compare_df(df_a: pd.DataFrame, mismatch_df = merged.loc[mismatched_idx] - if diff_rows > 0: + if diff_rows > 0 or diff_cols > 0: logger.debug("Results do not match. Diff %d/%d (%f %%). First 10 mismatched rows:", diff_rows, total_rows, @@ -160,5 +161,5 @@ def compare_df(df_a: pd.DataFrame, "matching_cols": list(same_columns), "extra_cols": list(extra_columns), "missing_cols": list(missing_columns), - "diff_cols": len(extra_columns) + len(missing_columns) + "diff_cols": diff_cols } diff --git a/scripts/compare_data_files.py b/scripts/compare_data_files.py index b1a53f4fa1..3250d9d65b 100755 --- a/scripts/compare_data_files.py +++ b/scripts/compare_data_files.py @@ -66,7 +66,7 @@ def main(): abs_tol=args.abs_tol, rel_tol=args.rel_tol) - if results['diff_rows'] > 0: + if results['diff_rows'] > 0 or results['diff_cols'] > 0: sys.exit(1) From 2deaddd660ec24ec51b6883bf1c79525033835bc Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 7 Nov 2024 10:51:36 -0800 Subject: [PATCH 02/12] Give each example pipeline a unique output file name --- .../developer_guide/guides/2_real_world_phishing.md | 9 ++++----- examples/abp_nvsmi_detection/README.md | 4 ++-- examples/abp_pcap_detection/README.md | 2 +- examples/developer_guide/2_1_real_world_phishing/run.py | 3 +-- examples/gnn_fraud_detection_pipeline/README.md | 2 +- examples/gnn_fraud_detection_pipeline/run.py | 2 +- examples/log_parsing/README.md | 2 +- examples/nlp_si_detection/README.md | 2 +- 8 files changed, 12 insertions(+), 14 deletions(-) diff --git a/docs/source/developer_guide/guides/2_real_world_phishing.md b/docs/source/developer_guide/guides/2_real_world_phishing.md index c460af3e02..538c47a717 100644 --- a/docs/source/developer_guide/guides/2_real_world_phishing.md +++ b/docs/source/developer_guide/guides/2_real_world_phishing.md @@ -482,9 +482,10 @@ To explicitly set the output format we could specify the `file_type` argument to ```python import logging import os -import tempfile import click +from recipient_features_stage import RecipientFeaturesStage +from recipient_features_stage_deco import recipient_features_stage import morpheus from morpheus.config import Config @@ -499,8 +500,6 @@ from morpheus.stages.postprocess.serialize_stage import SerializeStage from morpheus.stages.preprocess.deserialize_stage import DeserializeStage from morpheus.stages.preprocess.preprocess_nlp_stage import PreprocessNLPStage from morpheus.utils.logger import configure_logging -from recipient_features_stage import RecipientFeaturesStage -from recipient_features_stage_deco import recipient_features_stage MORPHEUS_ROOT = os.environ['MORPHEUS_ROOT'] @@ -542,7 +541,7 @@ MORPHEUS_ROOT = os.environ['MORPHEUS_ROOT'] @click.option("--server_url", default='localhost:8000', help="Tritonserver url.") @click.option( "--output_file", - default=os.path.join(tempfile.gettempdir(), "detections.jsonlines"), + default="phishing_detections.jsonlines", help="The path to the file where the inference output will be saved.", ) def run_pipeline(use_stage_function: bool, @@ -633,7 +632,7 @@ morpheus --log_level=debug --plugin examples/developer_guide/2_1_real_world_phis monitor --description="Inference Rate" --smoothing=0.001 --unit=inf \ add-scores --label=is_phishing \ serialize \ - to-file --filename=/tmp/detections.jsonlines --overwrite + to-file --filename=phishing_detections_cli.jsonlines --overwrite ``` ## Stage Constructors diff --git a/examples/abp_nvsmi_detection/README.md b/examples/abp_nvsmi_detection/README.md index 244d729420..bf14eb0458 100644 --- a/examples/abp_nvsmi_detection/README.md +++ b/examples/abp_nvsmi_detection/README.md @@ -140,7 +140,7 @@ morpheus --log_level=DEBUG \ `# 7th Stage: Convert from objects back into strings. Ignore verbose input data` \ serialize --include 'mining' \ `# 8th Stage: Write out the JSON lines to the detections.jsonlines file` \ - to-file --filename=detections.jsonlines --overwrite + to-file --filename=abp_nvsmi_detections.jsonlines --overwrite ``` If successful, the following should be displayed: @@ -217,7 +217,7 @@ Added stage: morpheus.ControlMessage Added stage: └─ morpheus.ControlMessage -> morpheus.MessageMeta -Added stage: +Added stage: └─ morpheus.MessageMeta -> morpheus.MessageMeta ====Building Pipeline Complete!==== Starting! Time: 1656353254.9919598 diff --git a/examples/abp_pcap_detection/README.md b/examples/abp_pcap_detection/README.md index 6dc63212af..8eb0a44d2c 100644 --- a/examples/abp_pcap_detection/README.md +++ b/examples/abp_pcap_detection/README.md @@ -118,6 +118,6 @@ morpheus --log_level INFO --plugin "examples/abp_pcap_detection/abp_pcap_preproc monitor --description "Add classification rate" --unit "add-class" \ serialize \ monitor --description "Serialize rate" --unit ser \ - to-file --filename "pcap_out.jsonlines" --overwrite \ + to-file --filename "pcap_out_cli.jsonlines" --overwrite \ monitor --description "Write to file rate" --unit "to-file" ``` diff --git a/examples/developer_guide/2_1_real_world_phishing/run.py b/examples/developer_guide/2_1_real_world_phishing/run.py index 64ae7d77dc..a81bbd95a2 100755 --- a/examples/developer_guide/2_1_real_world_phishing/run.py +++ b/examples/developer_guide/2_1_real_world_phishing/run.py @@ -17,7 +17,6 @@ import logging import os -import tempfile import click from recipient_features_stage import RecipientFeaturesStage @@ -77,7 +76,7 @@ @click.option("--server_url", default='localhost:8000', help="Tritonserver url.") @click.option( "--output_file", - default=os.path.join(tempfile.gettempdir(), "detections.jsonlines"), + default="phishing_detections.jsonlines", help="The path to the file where the inference output will be saved.", ) def run_pipeline(use_stage_function: bool, diff --git a/examples/gnn_fraud_detection_pipeline/README.md b/examples/gnn_fraud_detection_pipeline/README.md index c7206787a6..ac2e7ec773 100644 --- a/examples/gnn_fraud_detection_pipeline/README.md +++ b/examples/gnn_fraud_detection_pipeline/README.md @@ -128,5 +128,5 @@ morpheus --log_level INFO \ gnn-fraud-classification --model_xgb_file examples/gnn_fraud_detection_pipeline/model/xgb.pt \ monitor --description "Add classification rate" \ serialize \ - to-file --filename "output.csv" --overwrite + to-file --filename "gnn_fraud_detection_cli_output.csv" --overwrite ``` diff --git a/examples/gnn_fraud_detection_pipeline/run.py b/examples/gnn_fraud_detection_pipeline/run.py index a5de019ed7..48364e13e2 100644 --- a/examples/gnn_fraud_detection_pipeline/run.py +++ b/examples/gnn_fraud_detection_pipeline/run.py @@ -84,7 +84,7 @@ @click.option( "--output_file", type=click.Path(dir_okay=False), - default="output.csv", + default="gnn_fraud_detection_output.csv", help="The path to the file where the inference output will be saved.", ) def run_pipeline(num_threads, diff --git a/examples/log_parsing/README.md b/examples/log_parsing/README.md index 5d2485a3bc..62d4ba53c7 100644 --- a/examples/log_parsing/README.md +++ b/examples/log_parsing/README.md @@ -119,6 +119,6 @@ morpheus --log_level INFO \ monitor --description "Inference rate" --unit inf \ log-postprocess --vocab_path ./models/training-tuning-scripts/sid-models/resources/bert-base-cased-vocab.txt \ --model_config_path=./models/log-parsing-models/log-parsing-config-20220418.json \ - to-file --filename ./log-parsing-output.jsonlines --overwrite \ + to-file --filename ./log-parsing-cli-output.jsonlines --overwrite \ monitor --description "Postprocessing rate" ``` diff --git a/examples/nlp_si_detection/README.md b/examples/nlp_si_detection/README.md index 37d4abfa1f..d12fb6a6c0 100644 --- a/examples/nlp_si_detection/README.md +++ b/examples/nlp_si_detection/README.md @@ -132,7 +132,7 @@ morpheus --log_level=DEBUG \ `# 8th Stage: Convert from objects back into strings` \ serialize --exclude '^_ts_' \ `# 9th Stage: Write out the JSON lines to the detections.jsonlines file` \ - to-file --filename=detections.jsonlines --overwrite + to-file --filename=nlp_si_detections.jsonlines --overwrite ``` If successful, the following should be displayed: From 63e7a46801e99ea53c6bdf534ee2e8d01b3eeea3 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 7 Nov 2024 11:17:05 -0800 Subject: [PATCH 03/12] Ignore output files from example pipelines --- .gitignore | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.gitignore b/.gitignore index 064d3cf759..6165e7a078 100755 --- a/.gitignore +++ b/.gitignore @@ -213,3 +213,18 @@ examples/digital_fingerprinting/visualization/*-dfp-output/ # Ignore saved benchmark data tests/benchmarks/.benchmarks/ examples/digital_fingerprinting/production/benchmarks/.benchmarks/ + +# Ignore output files from examples +/abp_nvsmi_detections.jsonlines +/detections.jsonlines +/gnn_fraud_detection_cli_output.csv +/gnn_fraud_detection_output.csv +/log-parsing-output.jsonlines +/log-parsing-output_cli.jsonlines +/nlp_si_detections.jsonlines +/pcap_out.jsonlines +/pcap_out_cli.jsonlines +/phishing_detections.jsonlines +/phishing_detections_cli.jsonlines +/ransomware_detection_output.jsonlines +/root-cause-binary-output.jsonlines From 8df81086476431f650b73d57e6710f83a019ffd1 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 7 Nov 2024 11:40:20 -0800 Subject: [PATCH 04/12] Fix output file name for log parsing --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 6165e7a078..e5ec5b6991 100755 --- a/.gitignore +++ b/.gitignore @@ -220,7 +220,7 @@ examples/digital_fingerprinting/production/benchmarks/.benchmarks/ /gnn_fraud_detection_cli_output.csv /gnn_fraud_detection_output.csv /log-parsing-output.jsonlines -/log-parsing-output_cli.jsonlines +/log-parsing-cli-output.jsonlines /nlp_si_detections.jsonlines /pcap_out.jsonlines /pcap_out_cli.jsonlines From db07ec16547dba7f68275a970138b9502607bc57 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 7 Nov 2024 13:04:09 -0800 Subject: [PATCH 05/12] Add a stage to create a unique output field by concatenating the timestamp and pid_process columns. Add a `--pipeline_batch_size` flag defaulting to the `model_max_batch_size` avoiding a config warning Add type hints for `run_pipeline` function arguments --- examples/ransomware_detection/run.py | 47 +++++++++++++++++++++------- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/examples/ransomware_detection/run.py b/examples/ransomware_detection/run.py index 4887e7ff1b..410cb4386a 100644 --- a/examples/ransomware_detection/run.py +++ b/examples/ransomware_detection/run.py @@ -21,9 +21,12 @@ from stages.create_features import CreateFeaturesRWStage from stages.preprocessing import PreprocessingRWStage +from morpheus.common import TypeId from morpheus.config import Config from morpheus.config import PipelineModes +from morpheus.messages import MessageMeta from morpheus.pipeline.linear_pipeline import LinearPipeline +from morpheus.pipeline.stage_decorator import stage from morpheus.stages.general.monitor_stage import MonitorStage from morpheus.stages.inference.triton_inference_stage import TritonInferenceStage from morpheus.stages.input.appshield_source_stage import AppShieldSourceStage @@ -61,6 +64,12 @@ type=click.IntRange(min=1), help="Max batch size to use for the model.", ) +@click.option( + "--pipeline_batch_size", + default=1024, + type=click.IntRange(min=1), + help=("Internal batch size for the pipeline. Can be much larger than the model batch size."), +) @click.option( "--conf_file", type=click.STRING, @@ -98,18 +107,19 @@ default="./ransomware_detection_output.jsonlines", help="The path to the file where the inference output will be saved.", ) -def run_pipeline(debug, - num_threads, - n_dask_workers, - threads_per_dask_worker, - model_max_batch_size, - conf_file, - model_name, - server_url, - sliding_window, - input_glob, - watch_directory, - output_file): +def run_pipeline(debug: bool, + num_threads: int, + n_dask_workers: int, + threads_per_dask_worker: int, + model_max_batch_size: int, + pipeline_batch_size: int, + conf_file: str, + model_name: str, + server_url: str, + sliding_window: int, + input_glob: str, + watch_directory: bool, + output_file: str): if debug: configure_logging(log_level=logging.DEBUG) @@ -125,6 +135,7 @@ def run_pipeline(debug, # Below properties are specified by the command line. config.num_threads = num_threads config.model_max_batch_size = model_max_batch_size + config.pipeline_batch_size = pipeline_batch_size config.feature_length = snapshot_fea_length * sliding_window config.class_labels = ["pred", "score"] @@ -222,6 +233,18 @@ def run_pipeline(debug, # This stage logs the metrics (msg/sec) from the above stage. pipeline.add_stage(MonitorStage(config, description="Serialize rate")) + @stage(needed_columns={'timestamp_process': TypeId.STRING}) + def concat_columns(msg: MessageMeta) -> MessageMeta: + """ + This stage concatinates the timestamp and pid_process columns to create a unique field. + """ + with msg.mutable_dataframe() as df: + df['timestamp_process'] = df['timestamp'] + df['pid_process'] + + return msg + + pipeline.add_stage(concat_columns(config)) + # Add a write file stage. # This stage writes all messages to a file. pipeline.add_stage(WriteToFileStage(config, filename=output_file, overwrite=True)) From 88dca9599bd643420de90ec9eb675605b82b4fde Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 7 Nov 2024 13:10:29 -0800 Subject: [PATCH 06/12] Call _validate_config() from Config.freeze() instead of the attribute setters, avoids issue where setting attributes becomes order dependent --- python/morpheus/morpheus/config.py | 3 +-- tests/morpheus/test_config.py | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/python/morpheus/morpheus/config.py b/python/morpheus/morpheus/config.py index 2bc589a186..16ba6ed86d 100644 --- a/python/morpheus/morpheus/config.py +++ b/python/morpheus/morpheus/config.py @@ -235,6 +235,7 @@ def freeze(self): """ self._check_cpp_mode(fix_mis_match=not self.frozen) if not self.frozen: + self._validate_config() self.frozen = True def _check_cpp_mode(self, fix_mis_match: bool = False): @@ -267,7 +268,6 @@ def pipeline_batch_size(self): @pipeline_batch_size.setter def pipeline_batch_size(self, value: int): self._pipeline_batch_size = value - self._validate_config() @property def model_max_batch_size(self): @@ -276,7 +276,6 @@ def model_max_batch_size(self): @model_max_batch_size.setter def model_max_batch_size(self, value: int): self._model_max_batch_size = value - self._validate_config() def _validate_config(self): if self._pipeline_batch_size < self._model_max_batch_size: diff --git a/tests/morpheus/test_config.py b/tests/morpheus/test_config.py index 746acf3771..9ebce02c8b 100755 --- a/tests/morpheus/test_config.py +++ b/tests/morpheus/test_config.py @@ -159,6 +159,7 @@ def test_warning_model_batch_size_less_than_pipeline_batch_size(caplog: pytest.L config.pipeline_batch_size = 256 with caplog.at_level(logging.WARNING): config.model_max_batch_size = 257 + config.freeze() assert len(caplog.records) == 1 import re assert re.match(".*pipeline_batch_size < model_max_batch_size.*", caplog.records[0].message) is not None @@ -169,6 +170,7 @@ def test_warning_pipeline_batch_size_less_than_model_batch_size(caplog: pytest.L config.model_max_batch_size = 8 with caplog.at_level(logging.WARNING): config.pipeline_batch_size = 7 + config.freeze() assert len(caplog.records) == 1 import re assert re.match(".*pipeline_batch_size < model_max_batch_size.*", caplog.records[0].message) is not None From 450149f6a6e9f00cbd5b533dec801cfcf4023367 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 7 Nov 2024 13:30:37 -0800 Subject: [PATCH 07/12] Revert import order --- docs/source/developer_guide/guides/2_real_world_phishing.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/developer_guide/guides/2_real_world_phishing.md b/docs/source/developer_guide/guides/2_real_world_phishing.md index 538c47a717..536f8a195c 100644 --- a/docs/source/developer_guide/guides/2_real_world_phishing.md +++ b/docs/source/developer_guide/guides/2_real_world_phishing.md @@ -484,8 +484,6 @@ import logging import os import click -from recipient_features_stage import RecipientFeaturesStage -from recipient_features_stage_deco import recipient_features_stage import morpheus from morpheus.config import Config @@ -500,6 +498,8 @@ from morpheus.stages.postprocess.serialize_stage import SerializeStage from morpheus.stages.preprocess.deserialize_stage import DeserializeStage from morpheus.stages.preprocess.preprocess_nlp_stage import PreprocessNLPStage from morpheus.utils.logger import configure_logging +from recipient_features_stage import RecipientFeaturesStage +from recipient_features_stage_deco import recipient_features_stage MORPHEUS_ROOT = os.environ['MORPHEUS_ROOT'] From 001ca3bc1366ce23180ee1d5edec4754c06a73a5 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 7 Nov 2024 13:33:31 -0800 Subject: [PATCH 08/12] Fix output file in logs --- examples/gnn_fraud_detection_pipeline/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/gnn_fraud_detection_pipeline/README.md b/examples/gnn_fraud_detection_pipeline/README.md index ac2e7ec773..5354128419 100644 --- a/examples/gnn_fraud_detection_pipeline/README.md +++ b/examples/gnn_fraud_detection_pipeline/README.md @@ -97,7 +97,7 @@ Added stage: morpheus.MessageMeta Added stage: └─ morpheus.MessageMeta -> morpheus.MessageMeta -Added stage: +Added stage: └─ morpheus.MessageMeta -> morpheus.MessageMeta ====Building Segment Complete!==== Graph construction rate[Complete]: 265 messages [00:00, 1016.18 messages/s] From d8a563a1dae7d565a93d2220d0ec0a38be65dbe5 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 7 Nov 2024 13:35:46 -0800 Subject: [PATCH 09/12] Update output filename --- examples/nlp_si_detection/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/nlp_si_detection/README.md b/examples/nlp_si_detection/README.md index d12fb6a6c0..24f94ad15b 100644 --- a/examples/nlp_si_detection/README.md +++ b/examples/nlp_si_detection/README.md @@ -131,7 +131,7 @@ morpheus --log_level=DEBUG \ filter --filter_source=TENSOR \ `# 8th Stage: Convert from objects back into strings` \ serialize --exclude '^_ts_' \ - `# 9th Stage: Write out the JSON lines to the detections.jsonlines file` \ + `# 9th Stage: Write out the JSON lines to the nlp_si_detections.jsonlines file` \ to-file --filename=nlp_si_detections.jsonlines --overwrite ``` @@ -187,7 +187,7 @@ Added stage: morpheus.ControlMessage Added stage: └─ morpheus.ControlMessage -> morpheus.MessageMeta -Added stage: +Added stage: └─ morpheus.MessageMeta -> morpheus.MessageMeta ====Building Pipeline Complete!==== Starting! Time: 1656352480.541071 @@ -196,7 +196,7 @@ Inference Rate[Complete]: 93085inf [00:07, 12673.63inf/s] ``` -The output file `detections.jsonlines` will contain the original PCAP messages with the following additional fields added: +The output file `nlp_si_detections.jsonlines` will contain the original PCAP messages with the following additional fields added: * `address` * `bank_acct` * `credit_card` From 510a90dccc93734354c51d9065d488ae0a30dbcc Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 21 Nov 2024 13:17:49 -0800 Subject: [PATCH 10/12] Remove ignores for example output files since this will be moved to .tmp --- .gitignore | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/.gitignore b/.gitignore index e5ec5b6991..064d3cf759 100755 --- a/.gitignore +++ b/.gitignore @@ -213,18 +213,3 @@ examples/digital_fingerprinting/visualization/*-dfp-output/ # Ignore saved benchmark data tests/benchmarks/.benchmarks/ examples/digital_fingerprinting/production/benchmarks/.benchmarks/ - -# Ignore output files from examples -/abp_nvsmi_detections.jsonlines -/detections.jsonlines -/gnn_fraud_detection_cli_output.csv -/gnn_fraud_detection_output.csv -/log-parsing-output.jsonlines -/log-parsing-cli-output.jsonlines -/nlp_si_detections.jsonlines -/pcap_out.jsonlines -/pcap_out_cli.jsonlines -/phishing_detections.jsonlines -/phishing_detections_cli.jsonlines -/ransomware_detection_output.jsonlines -/root-cause-binary-output.jsonlines From c65c4424bf7e9730c7f257fc00eaaae613ec4919 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 21 Nov 2024 14:54:03 -0800 Subject: [PATCH 11/12] Move default output dir to .tmp/output --- docs/source/developer_guide/guides/2_real_world_phishing.md | 4 ++-- examples/abp_nvsmi_detection/README.md | 6 +++--- examples/abp_pcap_detection/README.md | 4 ++-- examples/abp_pcap_detection/run.py | 2 +- examples/cpu_only/README.md | 4 ++-- examples/cpu_only/run.py | 2 +- examples/developer_guide/2_1_real_world_phishing/run.py | 2 +- examples/gnn_fraud_detection_pipeline/README.md | 2 +- examples/gnn_fraud_detection_pipeline/run.py | 2 +- examples/log_parsing/README.md | 2 +- examples/log_parsing/run.py | 2 +- examples/nlp_si_detection/README.md | 4 ++-- examples/nlp_si_detection/run.sh | 2 +- examples/ransomware_detection/README.md | 2 +- examples/ransomware_detection/run.py | 2 +- examples/root_cause_analysis/README.md | 6 +++--- 16 files changed, 24 insertions(+), 24 deletions(-) diff --git a/docs/source/developer_guide/guides/2_real_world_phishing.md b/docs/source/developer_guide/guides/2_real_world_phishing.md index 536f8a195c..61ba218fd5 100644 --- a/docs/source/developer_guide/guides/2_real_world_phishing.md +++ b/docs/source/developer_guide/guides/2_real_world_phishing.md @@ -541,7 +541,7 @@ MORPHEUS_ROOT = os.environ['MORPHEUS_ROOT'] @click.option("--server_url", default='localhost:8000', help="Tritonserver url.") @click.option( "--output_file", - default="phishing_detections.jsonlines", + default=".tmp/output/phishing_detections.jsonlines", help="The path to the file where the inference output will be saved.", ) def run_pipeline(use_stage_function: bool, @@ -632,7 +632,7 @@ morpheus --log_level=debug --plugin examples/developer_guide/2_1_real_world_phis monitor --description="Inference Rate" --smoothing=0.001 --unit=inf \ add-scores --label=is_phishing \ serialize \ - to-file --filename=phishing_detections_cli.jsonlines --overwrite + to-file --filename=.tmp/output/phishing_detections_cli.jsonlines --overwrite ``` ## Stage Constructors diff --git a/examples/abp_nvsmi_detection/README.md b/examples/abp_nvsmi_detection/README.md index bf14eb0458..f7fed3e260 100644 --- a/examples/abp_nvsmi_detection/README.md +++ b/examples/abp_nvsmi_detection/README.md @@ -140,7 +140,7 @@ morpheus --log_level=DEBUG \ `# 7th Stage: Convert from objects back into strings. Ignore verbose input data` \ serialize --include 'mining' \ `# 8th Stage: Write out the JSON lines to the detections.jsonlines file` \ - to-file --filename=abp_nvsmi_detections.jsonlines --overwrite + to-file --filename=.tmp/output/abp_nvsmi_detections.jsonlines --overwrite ``` If successful, the following should be displayed: @@ -217,7 +217,7 @@ Added stage: morpheus.ControlMessage Added stage: └─ morpheus.ControlMessage -> morpheus.MessageMeta -Added stage: +Added stage: └─ morpheus.MessageMeta -> morpheus.MessageMeta ====Building Pipeline Complete!==== Starting! Time: 1656353254.9919598 @@ -225,7 +225,7 @@ Inference Rate[Complete]: 1242inf [00:00, 1863.04inf/s] ====Pipeline Complete==== ``` -The output file `detections.jsonlines` will contain a single boolean value for each input line. At some point the values will switch from `0` to `1`: +The output file `.tmp/output/detections.jsonlines` will contain a single boolean value for each input line. At some point the values will switch from `0` to `1`: ``` ... diff --git a/examples/abp_pcap_detection/README.md b/examples/abp_pcap_detection/README.md index 8eb0a44d2c..9c04f5e68f 100644 --- a/examples/abp_pcap_detection/README.md +++ b/examples/abp_pcap_detection/README.md @@ -97,7 +97,7 @@ python examples/abp_pcap_detection/run.py ``` Note: Both Morpheus and Triton Inference Server containers must have access to the same GPUs in order for this example to work. -The pipeline will process the input `abp_pcap_dump.jsonlines` sample data and write it to `pcap_out.jsonlines`. +The pipeline will process the input `abp_pcap_dump.jsonlines` sample data and write it to `.tmp/output/pcap_out.jsonlines`. ### CLI Example The above example is illustrative of using the Python API to build a custom Morpheus Pipeline. @@ -118,6 +118,6 @@ morpheus --log_level INFO --plugin "examples/abp_pcap_detection/abp_pcap_preproc monitor --description "Add classification rate" --unit "add-class" \ serialize \ monitor --description "Serialize rate" --unit ser \ - to-file --filename "pcap_out_cli.jsonlines" --overwrite \ + to-file --filename ".tmp/output/pcap_out_cli.jsonlines" --overwrite \ monitor --description "Write to file rate" --unit "to-file" ``` diff --git a/examples/abp_pcap_detection/run.py b/examples/abp_pcap_detection/run.py index b1a654bbd9..24405bad3c 100644 --- a/examples/abp_pcap_detection/run.py +++ b/examples/abp_pcap_detection/run.py @@ -65,7 +65,7 @@ ) @click.option( "--output_file", - default="./pcap_out.jsonlines", + default="./.tmp/output/pcap_out.jsonlines", help="The path to the file where the inference output will be saved.", ) @click.option( diff --git a/examples/cpu_only/README.md b/examples/cpu_only/README.md index feac382a3f..3e8abd3233 100644 --- a/examples/cpu_only/README.md +++ b/examples/cpu_only/README.md @@ -53,7 +53,7 @@ Options: To launch the configured Morpheus pipeline with the sample data that is provided in `examples/data`, run the following: ```bash -python examples/cpu_only/run.py --use_cpu_only --in_file=examples/data/email.jsonlines --out_file=.tmp/out.jsonlines +python examples/cpu_only/run.py --use_cpu_only --in_file=examples/data/email.jsonlines --out_file=.tmp/output/cpu_only_out.jsonlines ``` ### CLI Example @@ -68,5 +68,5 @@ morpheus --log_level INFO \ deserialize \ monitor --description "deserialize" \ serialize \ - to-file --filename=.tmp/out.jsonlines --overwrite + to-file --filename=.tmp/output/cpu_only_cli_out.jsonlines --overwrite ``` diff --git a/examples/cpu_only/run.py b/examples/cpu_only/run.py index f0a50a47e0..7cbc96a440 100644 --- a/examples/cpu_only/run.py +++ b/examples/cpu_only/run.py @@ -61,7 +61,7 @@ "--out_file", help="Output file", type=click.Path(dir_okay=False), - default="output.csv", + default=".tmp/output/cpu_only_out.csv", required=True, ) def run_pipeline(log_level: int, use_cpu_only: bool, in_file: pathlib.Path, out_file: pathlib.Path): diff --git a/examples/developer_guide/2_1_real_world_phishing/run.py b/examples/developer_guide/2_1_real_world_phishing/run.py index a81bbd95a2..8f83fade5c 100755 --- a/examples/developer_guide/2_1_real_world_phishing/run.py +++ b/examples/developer_guide/2_1_real_world_phishing/run.py @@ -76,7 +76,7 @@ @click.option("--server_url", default='localhost:8000', help="Tritonserver url.") @click.option( "--output_file", - default="phishing_detections.jsonlines", + default=".tmp/output/phishing_detections.jsonlines", help="The path to the file where the inference output will be saved.", ) def run_pipeline(use_stage_function: bool, diff --git a/examples/gnn_fraud_detection_pipeline/README.md b/examples/gnn_fraud_detection_pipeline/README.md index 5354128419..9110cc27ff 100644 --- a/examples/gnn_fraud_detection_pipeline/README.md +++ b/examples/gnn_fraud_detection_pipeline/README.md @@ -97,7 +97,7 @@ Added stage: morpheus.MessageMeta Added stage: └─ morpheus.MessageMeta -> morpheus.MessageMeta -Added stage: +Added stage: └─ morpheus.MessageMeta -> morpheus.MessageMeta ====Building Segment Complete!==== Graph construction rate[Complete]: 265 messages [00:00, 1016.18 messages/s] diff --git a/examples/gnn_fraud_detection_pipeline/run.py b/examples/gnn_fraud_detection_pipeline/run.py index 48364e13e2..73c3301d47 100644 --- a/examples/gnn_fraud_detection_pipeline/run.py +++ b/examples/gnn_fraud_detection_pipeline/run.py @@ -84,7 +84,7 @@ @click.option( "--output_file", type=click.Path(dir_okay=False), - default="gnn_fraud_detection_output.csv", + default=".tmp/output/gnn_fraud_detection_output.csv", help="The path to the file where the inference output will be saved.", ) def run_pipeline(num_threads, diff --git a/examples/log_parsing/README.md b/examples/log_parsing/README.md index 62d4ba53c7..19cda49a66 100644 --- a/examples/log_parsing/README.md +++ b/examples/log_parsing/README.md @@ -119,6 +119,6 @@ morpheus --log_level INFO \ monitor --description "Inference rate" --unit inf \ log-postprocess --vocab_path ./models/training-tuning-scripts/sid-models/resources/bert-base-cased-vocab.txt \ --model_config_path=./models/log-parsing-models/log-parsing-config-20220418.json \ - to-file --filename ./log-parsing-cli-output.jsonlines --overwrite \ + to-file --filename .tmp/output/log-parsing-cli-output.jsonlines --overwrite \ monitor --description "Postprocessing rate" ``` diff --git a/examples/log_parsing/run.py b/examples/log_parsing/run.py index a85379f166..20e836c4c8 100644 --- a/examples/log_parsing/run.py +++ b/examples/log_parsing/run.py @@ -60,7 +60,7 @@ ) @click.option( "--output_file", - default="log-parsing-output.jsonlines", + default=".tmp/output/log-parsing-output.jsonlines", help="The path to the file where the inference output will be saved.", ) @click.option('--model_vocab_hash_file', diff --git a/examples/nlp_si_detection/README.md b/examples/nlp_si_detection/README.md index 24f94ad15b..4efdf1df6d 100644 --- a/examples/nlp_si_detection/README.md +++ b/examples/nlp_si_detection/README.md @@ -132,7 +132,7 @@ morpheus --log_level=DEBUG \ `# 8th Stage: Convert from objects back into strings` \ serialize --exclude '^_ts_' \ `# 9th Stage: Write out the JSON lines to the nlp_si_detections.jsonlines file` \ - to-file --filename=nlp_si_detections.jsonlines --overwrite + to-file --filename=.tmp/output/nlp_si_detections.jsonlines --overwrite ``` If successful, the following should be displayed: @@ -187,7 +187,7 @@ Added stage: morpheus.ControlMessage Added stage: └─ morpheus.ControlMessage -> morpheus.MessageMeta -Added stage: +Added stage: └─ morpheus.MessageMeta -> morpheus.MessageMeta ====Building Pipeline Complete!==== Starting! Time: 1656352480.541071 diff --git a/examples/nlp_si_detection/run.sh b/examples/nlp_si_detection/run.sh index 390418e545..d88f6a8ffb 100755 --- a/examples/nlp_si_detection/run.sh +++ b/examples/nlp_si_detection/run.sh @@ -29,4 +29,4 @@ morpheus --log_level=DEBUG \ add-class \ filter --filter_source=TENSOR \ serialize --exclude '^_ts_' \ - to-file --filename=detections.jsonlines --overwrite + to-file --filename=.tmp/output/nlp_si_detections.jsonlines --overwrite diff --git a/examples/ransomware_detection/README.md b/examples/ransomware_detection/README.md index e1f7197e1e..226dba098d 100644 --- a/examples/ransomware_detection/README.md +++ b/examples/ransomware_detection/README.md @@ -72,7 +72,7 @@ python examples/ransomware_detection/run.py --server_url=localhost:8000 \ --sliding_window=3 \ --model_name=ransomw-model-short-rf \ --input_glob=./examples/data/appshield/*/snapshot-*/*.json \ - --output_file=./ransomware_detection_output.jsonlines + --output_file=.tmp/output/ransomware_detection_output.jsonlines ``` Input features for a short model can be taken from every three snapshots sequence, such as (1, 2, 3), or (2, 3, 4). The sliding window represents the number of subsequent snapshots that need to be taken into consideration when generating the input for a model. Sliding window for the medium model is `5` and for the long model it is `10`. diff --git a/examples/ransomware_detection/run.py b/examples/ransomware_detection/run.py index 410cb4386a..7bc8dbf487 100644 --- a/examples/ransomware_detection/run.py +++ b/examples/ransomware_detection/run.py @@ -104,7 +104,7 @@ @click.option( "--output_file", type=click.STRING, - default="./ransomware_detection_output.jsonlines", + default=".tmp/output/ransomware_detection_output.jsonlines", help="The path to the file where the inference output will be saved.", ) def run_pipeline(debug: bool, diff --git a/examples/root_cause_analysis/README.md b/examples/root_cause_analysis/README.md index 45d36b8f0f..0a5c178de8 100644 --- a/examples/root_cause_analysis/README.md +++ b/examples/root_cause_analysis/README.md @@ -124,7 +124,7 @@ add-scores --label=is_root_cause \ `# 7th Stage: Convert from objects back into strings` \ serialize --exclude '^ts_' \ `# 8th Stage: Write results out to CSV file` \ -to-file --filename=./root-cause-binary-output.jsonlines --overwrite +to-file --filename=.tmp/output/root-cause-binary-output.jsonlines --overwrite ``` If successful, the following should be displayed: @@ -177,10 +177,10 @@ Added stage: └─ morpheus.ControlMessagee -> morpheus.ControlMessage Added stage: └─ morpheus.ControlMessage -> morpheus.MessageMeta -Added stage: +Added stage: └─ morpheus.MessageMeta -> morpheus.MessageMeta Inference rate[Complete]: 473 inf [00:01, 340.43 inf/s] ====Pipeline Complete==== ``` -The output file `root-cause-binary-output.jsonlines` will contain the original kernel log messages with an additional field `is_root_cause`. The value of the new field will be the root cause probability. +The output file `.tmp/output/root-cause-binary-output.jsonlines` will contain the original kernel log messages with an additional field `is_root_cause`. The value of the new field will be the root cause probability. From ca6ae8a1b7baf27bcdb646f7f90b943d12fc89d2 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 21 Nov 2024 14:58:54 -0800 Subject: [PATCH 12/12] Add output dir --- examples/nlp_si_detection/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/nlp_si_detection/README.md b/examples/nlp_si_detection/README.md index 4efdf1df6d..2ed3c8dcb0 100644 --- a/examples/nlp_si_detection/README.md +++ b/examples/nlp_si_detection/README.md @@ -196,7 +196,7 @@ Inference Rate[Complete]: 93085inf [00:07, 12673.63inf/s] ``` -The output file `nlp_si_detections.jsonlines` will contain the original PCAP messages with the following additional fields added: +The output file `.tmp/output/nlp_si_detections.jsonlines` will contain the original PCAP messages with the following additional fields added: * `address` * `bank_acct` * `credit_card`