Group memory field names with prefix and minor fixes (#27)

* group memory field names with prefix and minor fixes * change to drop index on index reset
foundation-model-stack · May 30, 2024 · 70cdf71 · 70cdf71
1 parent 25171a0
commit 70cdf71
Show file tree

Hide file tree

Showing 4 changed files with 10 additions and 7 deletions.
diff --git a/README.md b/README.md
@@ -21,7 +21,6 @@ For example:
 - GPTQ-LoRA: 22-44 % token throughput increase on 1 GPU as compared to using Hugging Face BNB QLoRA 
 - GPTQ-LoRA: Straightforward integration with multiple GPU as compared to using Hugging Face BNB QLoRA
 
-*Huggingface BNB QLoRA numbers taken with legacy approaches, but we are aware of [this issue](https://github.com/foundation-model-stack/fms-acceleration/issues/10) and will update our benches*.
 *The above includes numbers using fusedOps-and-kernels and actual impl coming soon, see below*.
 
 **This package is in BETA and is under development. Expect breaking changes!**

diff --git a/scripts/benchmarks/benchmark.py b/scripts/benchmarks/benchmark.py
@@ -77,7 +77,7 @@
 GPU_LOG_USED_MEM_COLUMN_NAME = "memory.used [MiB]"
 GPU_LOG_METRIC_SUFFIX = " MiB"
 GPU_TABLE = "timestamp,name,index,memory.used"
-RESULT_FIELD_RESERVED_GPU_MEM = "nvidia_mem_reserved"
+RESULT_FIELD_RESERVED_GPU_MEM = "mem_nvidia_mem_reserved"
 RESULT_FIELD_DEVICE_NAME = "gpu_device_name"
 
 HF_TRAINER_LOG_GPU_STAGE_BEFORE_INIT = "before_init_mem_gpu"
@@ -86,8 +86,8 @@
 KEYWORD_PEAKED_DELTA = "peaked_delta"
 KEYWORD_ALLOC_DELTA = "alloc_delta"
 HF_ARG_SKIP_MEMORY_METRIC = "--skip_memory_metrics"
-RESULT_FIELD_ALLOCATED_GPU_MEM = "torch_mem_alloc_in_bytes"
-RESULT_FIELD_PEAK_ALLOCATED_GPU_MEM = "peak_torch_mem_alloc_in_bytes"
+RESULT_FIELD_ALLOCATED_GPU_MEM = "mem_torch_mem_alloc_in_bytes"
+RESULT_FIELD_PEAK_ALLOCATED_GPU_MEM = "mem_peak_torch_mem_alloc_in_bytes"
 
 
 def extract_gpu_memory_metrics(output_metrics) -> Tuple[float]:

diff --git a/scripts/benchmarks/display_bench_results.py b/scripts/benchmarks/display_bench_results.py
@@ -22,7 +22,7 @@ def main(*directories: str, output_filename: str = "results.csv", remove_columns
         df = df.loc[df.error_messages.isna()]
     except:
         pass
-    df = df.reset_index().drop("output_dir", axis=1)
+    df = df.reset_index(drop=True).drop("output_dir", axis=1)
     df.reindex(sorted(df.columns), axis=1).to_csv(output_filename, index=False)
     print("***************** Report Created ******************")
     print(f"Total lines: '{len(df)}'")
@@ -55,4 +55,8 @@ def main(*directories: str, output_filename: str = "results.csv", remove_columns
     )
 
     args = parser.parse_args()
-    main(args.bench_outputs, output_filename=args.result_file, remove_columns=args.remove_columns)
+    main(
+        args.bench_outputs,
+        output_filename=args.result_file,
+        remove_columns=args.remove_columns,
+    )
diff --git a/scripts/run_benchmarks.sh b/scripts/run_benchmarks.sh
@@ -38,7 +38,7 @@ PIP_REQUIREMENTS_FILE=requirements.txt
 DRY_RUN=${DRY_RUN:-"false"}
 NO_DATA_PROCESSING=${NO_DATA_PROCESSING:-"false"}
 NO_OVERWRITE=${NO_OVERWRITE:-"false"}
-MEMORY_LOGGING=${MEMORY_LOGGING:-"huggingface"}
+MEMORY_LOGGING=${MEMORY_LOGGING:-"all"}
 
 # inputs
 NUM_GPUS_MATRIX=${1-"1 2"}