Skip to content

Commit

Permalink
Group memory field names with prefix and minor fixes (#27)
Browse files Browse the repository at this point in the history
* group memory field names with  prefix and minor fixes

* change to drop index on index reset
  • Loading branch information
achew010 authored May 30, 2024
1 parent 25171a0 commit 70cdf71
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 7 deletions.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ For example:
- GPTQ-LoRA: 22-44 % token throughput increase on 1 GPU as compared to using Hugging Face BNB QLoRA
- GPTQ-LoRA: Straightforward integration with multiple GPU as compared to using Hugging Face BNB QLoRA

*Huggingface BNB QLoRA numbers taken with legacy approaches, but we are aware of [this issue](https://github.com/foundation-model-stack/fms-acceleration/issues/10) and will update our benches*.
*The above includes numbers using fusedOps-and-kernels and actual impl coming soon, see below*.

**This package is in BETA and is under development. Expect breaking changes!**
Expand Down
6 changes: 3 additions & 3 deletions scripts/benchmarks/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@
GPU_LOG_USED_MEM_COLUMN_NAME = "memory.used [MiB]"
GPU_LOG_METRIC_SUFFIX = " MiB"
GPU_TABLE = "timestamp,name,index,memory.used"
RESULT_FIELD_RESERVED_GPU_MEM = "nvidia_mem_reserved"
RESULT_FIELD_RESERVED_GPU_MEM = "mem_nvidia_mem_reserved"
RESULT_FIELD_DEVICE_NAME = "gpu_device_name"

HF_TRAINER_LOG_GPU_STAGE_BEFORE_INIT = "before_init_mem_gpu"
Expand All @@ -86,8 +86,8 @@
KEYWORD_PEAKED_DELTA = "peaked_delta"
KEYWORD_ALLOC_DELTA = "alloc_delta"
HF_ARG_SKIP_MEMORY_METRIC = "--skip_memory_metrics"
RESULT_FIELD_ALLOCATED_GPU_MEM = "torch_mem_alloc_in_bytes"
RESULT_FIELD_PEAK_ALLOCATED_GPU_MEM = "peak_torch_mem_alloc_in_bytes"
RESULT_FIELD_ALLOCATED_GPU_MEM = "mem_torch_mem_alloc_in_bytes"
RESULT_FIELD_PEAK_ALLOCATED_GPU_MEM = "mem_peak_torch_mem_alloc_in_bytes"


def extract_gpu_memory_metrics(output_metrics) -> Tuple[float]:
Expand Down
8 changes: 6 additions & 2 deletions scripts/benchmarks/display_bench_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def main(*directories: str, output_filename: str = "results.csv", remove_columns
df = df.loc[df.error_messages.isna()]
except:
pass
df = df.reset_index().drop("output_dir", axis=1)
df = df.reset_index(drop=True).drop("output_dir", axis=1)
df.reindex(sorted(df.columns), axis=1).to_csv(output_filename, index=False)
print("***************** Report Created ******************")
print(f"Total lines: '{len(df)}'")
Expand Down Expand Up @@ -55,4 +55,8 @@ def main(*directories: str, output_filename: str = "results.csv", remove_columns
)

args = parser.parse_args()
main(args.bench_outputs, output_filename=args.result_file, remove_columns=args.remove_columns)
main(
args.bench_outputs,
output_filename=args.result_file,
remove_columns=args.remove_columns,
)
2 changes: 1 addition & 1 deletion scripts/run_benchmarks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ PIP_REQUIREMENTS_FILE=requirements.txt
DRY_RUN=${DRY_RUN:-"false"}
NO_DATA_PROCESSING=${NO_DATA_PROCESSING:-"false"}
NO_OVERWRITE=${NO_OVERWRITE:-"false"}
MEMORY_LOGGING=${MEMORY_LOGGING:-"huggingface"}
MEMORY_LOGGING=${MEMORY_LOGGING:-"all"}

# inputs
NUM_GPUS_MATRIX=${1-"1 2"}
Expand Down

0 comments on commit 70cdf71

Please sign in to comment.