forked from JeffersonLab/hd_utilities
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add scripts for benchmarking amptools fits
- Loading branch information
1 parent
aa69788
commit 06969be
Showing
3 changed files
with
275 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
#include <iostream> | ||
#include <iomanip> | ||
#include <stdio.h> | ||
#include <bits/stdc++.h> | ||
#include <string> | ||
#include <sstream> | ||
|
||
void plot_benchmark(TString dir = "./") { | ||
|
||
gStyle->SetOptStat(0); | ||
|
||
// initialize list of nCores to plot | ||
vector<int> numThreadsCPU = {1,2,4,8,16,32,64,96,128}; | ||
int numTestCPU = numThreadsCPU.size(); | ||
|
||
// for GPU fits, only add if desired | ||
vector<int> numThreadsGPUT4 = {1,2,3,4}; | ||
vector<int> numThreadsGPURTX = {}; | ||
|
||
// names of directories containing benchmark results | ||
vector<TString> types = {"cpu"}; | ||
vector<TGraphErrors*> grBenchmarkScan; | ||
if(numThreadsGPUT4.size() > 0) types.push_back("gpuT4"); | ||
if(numThreadsGPURTX.size() > 0) types.push_back("gpuTitanRTX"); | ||
|
||
TH1F *hBenchmarkScan = new TH1F("hBenchmarkScan","; Number of GPUs or CPUs; Fit speed (Likelihood function call rate [Hz])", 200, 0, 200); | ||
double maxRate = 0; | ||
|
||
for(int itype=0; itype<types.size(); itype++) { | ||
vector<int> numThreads = numThreadsCPU; | ||
if(types[itype] == "gpuT4") numThreads = numThreadsGPUT4; | ||
if(types[itype] == "gpuTitanRTX") numThreads = numThreadsGPURTX; | ||
grBenchmarkScan.push_back(new TGraphErrors(numThreads.size())); | ||
|
||
// loop over number of threads in test | ||
for(int ithread=0; ithread<numThreads.size(); ithread++) { | ||
|
||
int nThreads = numThreads[ithread]; | ||
string spath = Form("%s/%s%03d/log/fit.out", dir.Data(), types[itype].Data(), nThreads); | ||
cout << spath << endl; | ||
|
||
std::string read_line; | ||
ifstream file(spath); | ||
double parValue = 0; | ||
double parAvg = 0; | ||
vector<double> parSq; | ||
int nValues = 0; | ||
while (std::getline(file, read_line)) { | ||
|
||
TString line = read_line; | ||
if(line.Contains("time ")) { | ||
line.ReplaceAll("average time per function call: ",""); | ||
line.ReplaceAll(" ms.",""); | ||
parValue = 1./(atof(line)/1000); | ||
parAvg += parValue; | ||
parSq.push_back(parValue*parValue); | ||
nValues++; | ||
} | ||
else continue; | ||
|
||
} | ||
|
||
if(nValues > 0) { | ||
parAvg /= float(nValues); | ||
double parRms = 0; | ||
for(uint ip=0; ip<parSq.size(); ip++) | ||
parRms += (parSq.at(ip) + parAvg*parAvg - 2*sqrt(parSq.at(ip))*parAvg); | ||
parRms /= float(nValues); | ||
parRms = sqrt(parRms); | ||
if(parAvg > maxRate) maxRate = parAvg; | ||
cout<<parAvg<<" "<<parRms<<endl; | ||
if(parRms < 1e-9) parRms = 0.01; | ||
grBenchmarkScan[itype]->SetPoint(ithread, nThreads, parAvg); | ||
grBenchmarkScan[itype]->SetPointError(ithread, 0, parRms); | ||
} | ||
} | ||
} | ||
|
||
TCanvas *cc = new TCanvas("cc","cc",800,400); | ||
auto legend = new TLegend(0.47,0.17,0.9,0.42); | ||
|
||
hBenchmarkScan->SetMaximum(maxRate*2.5); | ||
hBenchmarkScan->SetMinimum(0.1); | ||
hBenchmarkScan->Draw(); | ||
vector<TF1*> fit; | ||
for(int itype=0; itype<types.size(); itype++) { | ||
grBenchmarkScan[itype]->SetMarkerStyle(20); | ||
grBenchmarkScan[itype]->SetMarkerColor(kBlack+itype); | ||
grBenchmarkScan[itype]->Draw("same pl"); | ||
|
||
if(itype==0) { | ||
fit.push_back(new TF1(types[itype],"pol1",1,200)); | ||
fit[itype]->FixParameter(0,0); | ||
grBenchmarkScan[itype]->Fit(fit[itype],"N","",0.5,24); | ||
fit[itype]->SetLineColor(kBlack+itype); fit[itype]->SetLineStyle(kDashed); | ||
fit[itype]->Draw("same"); | ||
} | ||
|
||
if(itype==0) | ||
legend->AddEntry(grBenchmarkScan[0],"ifarm19 CPU (2 thread/core)","pl"); | ||
if(types[itype] == "gpuT4") | ||
legend->AddEntry(grBenchmarkScan[itype],"sciml21 T4 GPU","pl"); | ||
if(types[itype] == "gpuTitanRTX") | ||
legend->AddEntry(grBenchmarkScan[itype],"sciml19 Titan RTX GPU","pl"); | ||
} | ||
|
||
gPad->SetLeftMargin(0.09); | ||
gPad->SetBottomMargin(0.15); | ||
gPad->SetTopMargin(0.05); | ||
gPad->SetRightMargin(0.05); | ||
gPad->SetLogx(); gPad->SetLogy(); | ||
gPad->SetGridy(); gPad->SetGridx(); | ||
hBenchmarkScan->GetXaxis()->SetTitleSize(0.05); | ||
hBenchmarkScan->GetYaxis()->SetTitleSize(0.05); | ||
hBenchmarkScan->GetXaxis()->SetTitleOffset(1.3); | ||
hBenchmarkScan->GetYaxis()->SetTitleOffset(0.8); | ||
|
||
legend->SetFillColor(0); | ||
legend->Draw(); | ||
|
||
cc->Print("benchmark.png"); | ||
|
||
return; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
#!/usr/bin/env python | ||
|
||
import sys | ||
import os | ||
import subprocess | ||
import math | ||
import pwd | ||
from optparse import OptionParser | ||
|
||
########################################################## MAIN ########################################################## | ||
def main(argv): | ||
|
||
# SLURM INFO (see options at https://scicomp.jlab.org/scicomp/slurmJob/slurmInfo) | ||
PARTITION = "ifarm" | ||
CONSTRAINT = "farm19" | ||
TIMELIMIT = "24:00:00" # Max walltime | ||
MyCPUs = [1, 2, 4, 8, 16, 32, 64, 96, 128] # List of CPU cores to use in benchmark fits | ||
|
||
# User provided environment, fit configuration and options | ||
MyEnv = "/work/halld2/home/jrsteven/analysisGluexI/builds/setup_gluex_scanParam.csh" | ||
MyConfig = "/work/halld2/home/jrsteven/forBenchmark/benchmark.cfg" | ||
MyMPIOpt = "--mca btl_openib_allow_ib 1" | ||
MyFitOpt = "-m 100000 -r 5" | ||
MyOutDir = "/volatile/halld/home/" + pwd.getpwuid( os.getuid() )[0] + "/benchmark/" | ||
|
||
# LOOP OVER # OF CORES FOR BENCHMARK | ||
for nCores in MyCPUs: | ||
# nodes used in fit (for every 64 CPUs allow an additional node) | ||
nNodes = nCores/64 + 1 | ||
|
||
# create output directories | ||
MyRunningDir = MyOutDir + "cpu%03d" % nCores | ||
MyLogOutDir = MyRunningDir + "/log" | ||
if not os.path.exists(MyOutDir): | ||
os.makedirs(MyOutDir) | ||
if not os.path.exists(MyRunningDir): | ||
os.makedirs(MyRunningDir) | ||
if not os.path.exists(MyLogOutDir): | ||
os.makedirs(MyLogOutDir) | ||
|
||
# create slurm submission script | ||
slurmOut = open("tempSlurm.txt",'w') | ||
slurmOut.write("#!/bin/csh \n") | ||
slurmOut.write("#SBATCH --nodes=%d \n" % nNodes) | ||
slurmOut.write("#SBATCH --partition=%s \n" % PARTITION) | ||
slurmOut.write("#SBATCH --constraint=%s \n" % CONSTRAINT) | ||
slurmOut.write("#SBATCH --cpus-per-task=1 \n") | ||
slurmOut.write("#SBATCH --ntasks-per-core=1 \n") | ||
slurmOut.write("#SBATCH --threads-per-core=1 \n") | ||
slurmOut.write("#SBATCH --mem=%dGB \n" % nCores) # 1 GB per core | ||
slurmOut.write("#SBATCH --time=%s \n" % TIMELIMIT) | ||
slurmOut.write("#SBATCH --ntasks=%d \n" % (nCores+1)) | ||
|
||
slurmOut.write("#SBATCH --chdir=%s \n" % MyRunningDir) | ||
slurmOut.write("#SBATCH --error=%s/fit.err \n" % (MyLogOutDir)) | ||
slurmOut.write("#SBATCH --output=%s/fit.out \n" % (MyLogOutDir)) | ||
slurmOut.write("#SBATCH --job-name=benchfit_%03d \n\n\n" % nCores) | ||
|
||
# commands to execute during job | ||
slurmOut.write("pwd \n") | ||
slurmOut.write("source %s \n" % MyEnv) | ||
slurmOut.write("mpirun %s fitMPI -c %s %s \n" % (MyMPIOpt, MyConfig, MyFitOpt)) | ||
slurmOut.close() | ||
|
||
# submit individual job | ||
print("Submitting %d core job on %d nodes" % (nCores, nNodes)) | ||
subprocess.call(["sbatch", "tempSlurm.txt"]) | ||
os.remove("tempSlurm.txt") | ||
|
||
|
||
if __name__ == "__main__": | ||
main(sys.argv[1:]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
#!/usr/bin/env python | ||
|
||
import sys | ||
import os | ||
import subprocess | ||
import math | ||
import pwd | ||
from optparse import OptionParser | ||
|
||
########################################################## MAIN ########################################################## | ||
def main(argv): | ||
|
||
# SLURM INFO (see options at https://scicomp.jlab.org/scicomp/slurmJob/slurmInfo) | ||
PARTITION = "gpu" | ||
GPUTYPE = "TitanRTX" | ||
TIMELIMIT = "24:00:00" # Max walltime | ||
MyGPUs = [1,2,3,4] # List of GPU cards to use in benchmark fits | ||
|
||
# User provided environment, fit configuration and options | ||
MyEnv = "/work/halld2/home/jrsteven/2021-amptools/builds_gpu/setup_gluex_dev.csh" | ||
MyConfig = "/work/halld2/home/jrsteven/forBenchmark/benchmark.cfg" | ||
MyMPIOpt = "--mca btl_openib_allow_ib 1" | ||
MyFitOpt = "-m 100000 -r 5" | ||
MyOutDir = "/volatile/halld/home/" + pwd.getpwuid( os.getuid() )[0] + "/benchmark/" | ||
|
||
# LOOP OVER # OF GPUs FOR BENCHMARK | ||
for nGPUs in MyGPUs: | ||
|
||
# Two types of nodes/GPUs (sciml19 and sciml21), both with 3 each | ||
nNodes = 1 | ||
if GPUTYPE=="T4": | ||
if nGPUs > 8: nNodes=2 | ||
if nGPUs > 16: nNodes=3 | ||
if GPUTYPE=="TitanRTX": | ||
if nGPUs > 4: nNodes=2 | ||
if nGPUs > 8: nNodes=3 | ||
|
||
# create output directories | ||
MyRunningDir = MyOutDir + "gpu%s%03d" % (GPUTYPE,nGPUs) | ||
MyLogOutDir = MyRunningDir + "/log" | ||
if not os.path.exists(MyOutDir): | ||
os.makedirs(MyOutDir) | ||
if not os.path.exists(MyRunningDir): | ||
os.makedirs(MyRunningDir) | ||
if not os.path.exists(MyLogOutDir): | ||
os.makedirs(MyLogOutDir) | ||
|
||
# create slurm submission script | ||
slurmOut = open("tempSlurm.txt",'w') | ||
slurmOut.write("#!/bin/csh \n") | ||
slurmOut.write("#SBATCH --nodes=%d \n" % nNodes) | ||
slurmOut.write("#SBATCH --partition=%s \n" % PARTITION) | ||
slurmOut.write("#SBATCH --gres=gpu:%s:%d \n" % (GPUTYPE,nGPUs)) | ||
slurmOut.write("#SBATCH --cpus-per-task=1 \n") | ||
slurmOut.write("#SBATCH --ntasks-per-core=1 \n") | ||
slurmOut.write("#SBATCH --threads-per-core=1 \n") | ||
slurmOut.write("#SBATCH --mem=20GB \n") # multiplied by nGPUs in slurm? | ||
slurmOut.write("#SBATCH --time=%s \n" % TIMELIMIT) | ||
slurmOut.write("#SBATCH --ntasks=%d \n" % (nGPUs+1)) | ||
|
||
slurmOut.write("#SBATCH --chdir=%s \n" % MyRunningDir) | ||
slurmOut.write("#SBATCH --error=%s/fit.err \n" % (MyLogOutDir)) | ||
slurmOut.write("#SBATCH --output=%s/fit.out \n" % (MyLogOutDir)) | ||
slurmOut.write("#SBATCH --job-name=benchfitgpu_%03d \n\n\n" % nGPUs) | ||
|
||
# commands to execute during job | ||
slurmOut.write("pwd \n") | ||
slurmOut.write("source %s \n" % MyEnv) | ||
slurmOut.write("mpirun %s fitMPI -c %s %s \n" % (MyMPIOpt, MyConfig, MyFitOpt)) | ||
slurmOut.close() | ||
|
||
# submit individual job | ||
print("Submitting %d GPU job on %d %s nodes" % (nGPUs, nNodes, GPUTYPE)) | ||
subprocess.call(["sbatch", "tempSlurm.txt"]) | ||
os.remove("tempSlurm.txt") | ||
|
||
|
||
if __name__ == "__main__": | ||
main(sys.argv[1:]) |