From 06969be1bfa9960cacc271b09c1b5189fe90cd49 Mon Sep 17 00:00:00 2001 From: Justin Stevens Date: Fri, 21 Jan 2022 15:26:44 -0500 Subject: [PATCH] add scripts for benchmarking amptools fits --- PWA_scripts/benchmark/plot_benchmark.C | 124 +++++++++++++++++++++++++ PWA_scripts/benchmark/submit.py | 72 ++++++++++++++ PWA_scripts/benchmark/submitGPU.py | 79 ++++++++++++++++ 3 files changed, 275 insertions(+) create mode 100644 PWA_scripts/benchmark/plot_benchmark.C create mode 100755 PWA_scripts/benchmark/submit.py create mode 100755 PWA_scripts/benchmark/submitGPU.py diff --git a/PWA_scripts/benchmark/plot_benchmark.C b/PWA_scripts/benchmark/plot_benchmark.C new file mode 100644 index 00000000..f85b7583 --- /dev/null +++ b/PWA_scripts/benchmark/plot_benchmark.C @@ -0,0 +1,124 @@ +#include +#include +#include +#include +#include +#include + +void plot_benchmark(TString dir = "./") { + + gStyle->SetOptStat(0); + + // initialize list of nCores to plot + vector numThreadsCPU = {1,2,4,8,16,32,64,96,128}; + int numTestCPU = numThreadsCPU.size(); + + // for GPU fits, only add if desired + vector numThreadsGPUT4 = {1,2,3,4}; + vector numThreadsGPURTX = {}; + + // names of directories containing benchmark results + vector types = {"cpu"}; + vector grBenchmarkScan; + if(numThreadsGPUT4.size() > 0) types.push_back("gpuT4"); + if(numThreadsGPURTX.size() > 0) types.push_back("gpuTitanRTX"); + + TH1F *hBenchmarkScan = new TH1F("hBenchmarkScan","; Number of GPUs or CPUs; Fit speed (Likelihood function call rate [Hz])", 200, 0, 200); + double maxRate = 0; + + for(int itype=0; itype numThreads = numThreadsCPU; + if(types[itype] == "gpuT4") numThreads = numThreadsGPUT4; + if(types[itype] == "gpuTitanRTX") numThreads = numThreadsGPURTX; + grBenchmarkScan.push_back(new TGraphErrors(numThreads.size())); + + // loop over number of threads in test + for(int ithread=0; ithread parSq; + int nValues = 0; + while (std::getline(file, read_line)) { + + TString line = read_line; + if(line.Contains("time ")) { + line.ReplaceAll("average time per function call: ",""); + line.ReplaceAll(" ms.",""); + parValue = 1./(atof(line)/1000); + parAvg += parValue; + parSq.push_back(parValue*parValue); + nValues++; + } + else continue; + + } + + if(nValues > 0) { + parAvg /= float(nValues); + double parRms = 0; + for(uint ip=0; ip maxRate) maxRate = parAvg; + cout<SetPoint(ithread, nThreads, parAvg); + grBenchmarkScan[itype]->SetPointError(ithread, 0, parRms); + } + } + } + + TCanvas *cc = new TCanvas("cc","cc",800,400); + auto legend = new TLegend(0.47,0.17,0.9,0.42); + + hBenchmarkScan->SetMaximum(maxRate*2.5); + hBenchmarkScan->SetMinimum(0.1); + hBenchmarkScan->Draw(); + vector fit; + for(int itype=0; itypeSetMarkerStyle(20); + grBenchmarkScan[itype]->SetMarkerColor(kBlack+itype); + grBenchmarkScan[itype]->Draw("same pl"); + + if(itype==0) { + fit.push_back(new TF1(types[itype],"pol1",1,200)); + fit[itype]->FixParameter(0,0); + grBenchmarkScan[itype]->Fit(fit[itype],"N","",0.5,24); + fit[itype]->SetLineColor(kBlack+itype); fit[itype]->SetLineStyle(kDashed); + fit[itype]->Draw("same"); + } + + if(itype==0) + legend->AddEntry(grBenchmarkScan[0],"ifarm19 CPU (2 thread/core)","pl"); + if(types[itype] == "gpuT4") + legend->AddEntry(grBenchmarkScan[itype],"sciml21 T4 GPU","pl"); + if(types[itype] == "gpuTitanRTX") + legend->AddEntry(grBenchmarkScan[itype],"sciml19 Titan RTX GPU","pl"); + } + + gPad->SetLeftMargin(0.09); + gPad->SetBottomMargin(0.15); + gPad->SetTopMargin(0.05); + gPad->SetRightMargin(0.05); + gPad->SetLogx(); gPad->SetLogy(); + gPad->SetGridy(); gPad->SetGridx(); + hBenchmarkScan->GetXaxis()->SetTitleSize(0.05); + hBenchmarkScan->GetYaxis()->SetTitleSize(0.05); + hBenchmarkScan->GetXaxis()->SetTitleOffset(1.3); + hBenchmarkScan->GetYaxis()->SetTitleOffset(0.8); + + legend->SetFillColor(0); + legend->Draw(); + + cc->Print("benchmark.png"); + + return; +} diff --git a/PWA_scripts/benchmark/submit.py b/PWA_scripts/benchmark/submit.py new file mode 100755 index 00000000..1e0b0a39 --- /dev/null +++ b/PWA_scripts/benchmark/submit.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python + +import sys +import os +import subprocess +import math +import pwd +from optparse import OptionParser + +########################################################## MAIN ########################################################## +def main(argv): + + # SLURM INFO (see options at https://scicomp.jlab.org/scicomp/slurmJob/slurmInfo) + PARTITION = "ifarm" + CONSTRAINT = "farm19" + TIMELIMIT = "24:00:00" # Max walltime + MyCPUs = [1, 2, 4, 8, 16, 32, 64, 96, 128] # List of CPU cores to use in benchmark fits + + # User provided environment, fit configuration and options + MyEnv = "/work/halld2/home/jrsteven/analysisGluexI/builds/setup_gluex_scanParam.csh" + MyConfig = "/work/halld2/home/jrsteven/forBenchmark/benchmark.cfg" + MyMPIOpt = "--mca btl_openib_allow_ib 1" + MyFitOpt = "-m 100000 -r 5" + MyOutDir = "/volatile/halld/home/" + pwd.getpwuid( os.getuid() )[0] + "/benchmark/" + + # LOOP OVER # OF CORES FOR BENCHMARK + for nCores in MyCPUs: + # nodes used in fit (for every 64 CPUs allow an additional node) + nNodes = nCores/64 + 1 + + # create output directories + MyRunningDir = MyOutDir + "cpu%03d" % nCores + MyLogOutDir = MyRunningDir + "/log" + if not os.path.exists(MyOutDir): + os.makedirs(MyOutDir) + if not os.path.exists(MyRunningDir): + os.makedirs(MyRunningDir) + if not os.path.exists(MyLogOutDir): + os.makedirs(MyLogOutDir) + + # create slurm submission script + slurmOut = open("tempSlurm.txt",'w') + slurmOut.write("#!/bin/csh \n") + slurmOut.write("#SBATCH --nodes=%d \n" % nNodes) + slurmOut.write("#SBATCH --partition=%s \n" % PARTITION) + slurmOut.write("#SBATCH --constraint=%s \n" % CONSTRAINT) + slurmOut.write("#SBATCH --cpus-per-task=1 \n") + slurmOut.write("#SBATCH --ntasks-per-core=1 \n") + slurmOut.write("#SBATCH --threads-per-core=1 \n") + slurmOut.write("#SBATCH --mem=%dGB \n" % nCores) # 1 GB per core + slurmOut.write("#SBATCH --time=%s \n" % TIMELIMIT) + slurmOut.write("#SBATCH --ntasks=%d \n" % (nCores+1)) + + slurmOut.write("#SBATCH --chdir=%s \n" % MyRunningDir) + slurmOut.write("#SBATCH --error=%s/fit.err \n" % (MyLogOutDir)) + slurmOut.write("#SBATCH --output=%s/fit.out \n" % (MyLogOutDir)) + slurmOut.write("#SBATCH --job-name=benchfit_%03d \n\n\n" % nCores) + + # commands to execute during job + slurmOut.write("pwd \n") + slurmOut.write("source %s \n" % MyEnv) + slurmOut.write("mpirun %s fitMPI -c %s %s \n" % (MyMPIOpt, MyConfig, MyFitOpt)) + slurmOut.close() + + # submit individual job + print("Submitting %d core job on %d nodes" % (nCores, nNodes)) + subprocess.call(["sbatch", "tempSlurm.txt"]) + os.remove("tempSlurm.txt") + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/PWA_scripts/benchmark/submitGPU.py b/PWA_scripts/benchmark/submitGPU.py new file mode 100755 index 00000000..9b708c5d --- /dev/null +++ b/PWA_scripts/benchmark/submitGPU.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python + +import sys +import os +import subprocess +import math +import pwd +from optparse import OptionParser + +########################################################## MAIN ########################################################## +def main(argv): + + # SLURM INFO (see options at https://scicomp.jlab.org/scicomp/slurmJob/slurmInfo) + PARTITION = "gpu" + GPUTYPE = "TitanRTX" + TIMELIMIT = "24:00:00" # Max walltime + MyGPUs = [1,2,3,4] # List of GPU cards to use in benchmark fits + + # User provided environment, fit configuration and options + MyEnv = "/work/halld2/home/jrsteven/2021-amptools/builds_gpu/setup_gluex_dev.csh" + MyConfig = "/work/halld2/home/jrsteven/forBenchmark/benchmark.cfg" + MyMPIOpt = "--mca btl_openib_allow_ib 1" + MyFitOpt = "-m 100000 -r 5" + MyOutDir = "/volatile/halld/home/" + pwd.getpwuid( os.getuid() )[0] + "/benchmark/" + + # LOOP OVER # OF GPUs FOR BENCHMARK + for nGPUs in MyGPUs: + + # Two types of nodes/GPUs (sciml19 and sciml21), both with 3 each + nNodes = 1 + if GPUTYPE=="T4": + if nGPUs > 8: nNodes=2 + if nGPUs > 16: nNodes=3 + if GPUTYPE=="TitanRTX": + if nGPUs > 4: nNodes=2 + if nGPUs > 8: nNodes=3 + + # create output directories + MyRunningDir = MyOutDir + "gpu%s%03d" % (GPUTYPE,nGPUs) + MyLogOutDir = MyRunningDir + "/log" + if not os.path.exists(MyOutDir): + os.makedirs(MyOutDir) + if not os.path.exists(MyRunningDir): + os.makedirs(MyRunningDir) + if not os.path.exists(MyLogOutDir): + os.makedirs(MyLogOutDir) + + # create slurm submission script + slurmOut = open("tempSlurm.txt",'w') + slurmOut.write("#!/bin/csh \n") + slurmOut.write("#SBATCH --nodes=%d \n" % nNodes) + slurmOut.write("#SBATCH --partition=%s \n" % PARTITION) + slurmOut.write("#SBATCH --gres=gpu:%s:%d \n" % (GPUTYPE,nGPUs)) + slurmOut.write("#SBATCH --cpus-per-task=1 \n") + slurmOut.write("#SBATCH --ntasks-per-core=1 \n") + slurmOut.write("#SBATCH --threads-per-core=1 \n") + slurmOut.write("#SBATCH --mem=20GB \n") # multiplied by nGPUs in slurm? + slurmOut.write("#SBATCH --time=%s \n" % TIMELIMIT) + slurmOut.write("#SBATCH --ntasks=%d \n" % (nGPUs+1)) + + slurmOut.write("#SBATCH --chdir=%s \n" % MyRunningDir) + slurmOut.write("#SBATCH --error=%s/fit.err \n" % (MyLogOutDir)) + slurmOut.write("#SBATCH --output=%s/fit.out \n" % (MyLogOutDir)) + slurmOut.write("#SBATCH --job-name=benchfitgpu_%03d \n\n\n" % nGPUs) + + # commands to execute during job + slurmOut.write("pwd \n") + slurmOut.write("source %s \n" % MyEnv) + slurmOut.write("mpirun %s fitMPI -c %s %s \n" % (MyMPIOpt, MyConfig, MyFitOpt)) + slurmOut.close() + + # submit individual job + print("Submitting %d GPU job on %d %s nodes" % (nGPUs, nNodes, GPUTYPE)) + subprocess.call(["sbatch", "tempSlurm.txt"]) + os.remove("tempSlurm.txt") + + +if __name__ == "__main__": + main(sys.argv[1:])