Skip to content

Commit

Permalink
Speedup gengram and setc by batching the inner-product reduction (#214)
Browse files Browse the repository at this point in the history
  • Loading branch information
kent0 authored Mar 25, 2024
1 parent 4c172ff commit 0c56bdd
Show file tree
Hide file tree
Showing 16 changed files with 167 additions and 80 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/integ.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
test: [ CYL ]
env:
MOR_DIR: /home/runner/work/NekROM/NekROM
USR: "ana.o aux.o conv.o dump.o ei.o filter.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o tensor.o riesz.o"
USR: "ana.o aux.o conv.o dump.o ei.o filter.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o tensor.o riesz.o batch.o"

steps:
- uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/integ_t.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
test: [ ANN ]
env:
MOR_DIR: /home/runner/work/NekROM/NekROM
USR: "ana.o aux.o conv.o dump.o ei.o filter.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o tensor.o riesz.o"
USR: "ana.o aux.o conv.o dump.o ei.o filter.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o tensor.o riesz.o batch.o"

steps:
- uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/unit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
ips: [L2, H10]
env:
MOR_DIR: /home/runner/work/NekROM/NekROM
USR: "ana.o aux.o conv.o dump.o ei.o filter.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o tensor.o riesz.o"
USR: "ana.o aux.o conv.o dump.o ei.o filter.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o tensor.o riesz.o batch.o"

steps:
- uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/unit_rbf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
test: [RBF_WT]
env:
MOR_DIR: /home/runner/work/NekROM/NekROM
USR: "ana.o aux.o conv.o dump.o ei.o filter.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o tensor.o riesz.o"
USR: "ana.o aux.o conv.o dump.o ei.o filter.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o tensor.o riesz.o batch.o"

steps:
- uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/unit_rf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
test: [rf]
env:
MOR_DIR: /home/runner/work/NekROM/NekROM
USR: "ana.o aux.o conv.o dump.o ei.o filter.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o tensor.o riesz.o"
USR: "ana.o aux.o conv.o dump.o ei.o filter.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o tensor.o riesz.o batch.o"

steps:
- uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion bin/makerom
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash

$MOR_DIR/bin/linkm
USR="$USR ana.o aux.o conv.o dump.o ei.o filter.o lapack.o pod.o qoi.o rom.o read.o time.o const.o rk.o legacy.o tensor.o mpar.o riesz.o" makenek $1
USR="$USR ana.o aux.o conv.o dump.o ei.o filter.o lapack.o pod.o qoi.o rom.o read.o time.o const.o rk.o legacy.o tensor.o mpar.o riesz.o batch.o" makenek $1
2 changes: 1 addition & 1 deletion bin/setup
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@

export MOR_DIR=$(cd ..; pwd -P)
export PATH=$MOR_DIR/bin:$PATH
export USR="$USR ana.o aux.o conv.o dump.o ei.o filter.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o riesz.o"
export USR="$USR ana.o aux.o conv.o dump.o ei.o filter.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o riesz.o batch.o"
2 changes: 1 addition & 1 deletion code/MOR
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ c parameters and common blocks for MOR

integer ad_nsteps,ad_iostep,ad_step,ad_qstep
common /moripar/ ad_nsteps,ad_iostep,ad_step,ad_qstep,inus,
$ navg_step,mb,nb,nbo,nplay,nintp,iaug
$ navg_step,mb,nb,nbo,nplay,nintp,iaug,nbat

common /morivars/ i0,j0,k0,i1,j1,k1,nns,ns,nskip,navg,ncloc,npr,
$ npart,isolve,ic1,ic2,jc1,jc2,kc1,kc2,idirf
Expand Down
97 changes: 49 additions & 48 deletions code/MORDICT
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ c Note: Keys have to be in capital letters
c
integer MORDICT_NKEYS

parameter (MORDICT_NKEYS = 60)
parameter (MORDICT_NKEYS = 61)

character*132 mordictkey(MORDICT_NKEYS)
data
Expand All @@ -21,50 +21,51 @@ c
& mordictkey(11) / 'GENERAL:DECOUPLED' /
& mordictkey(12) / 'GENERAL:CFLOW' /
& mordictkey(13) / 'GENERAL:EDDY_VIS' /
& mordictkey(14) / 'POD' /
& mordictkey(15) / 'POD:TYPE' /
& mordictkey(16) / 'POD:MODE0' /
& mordictkey(17) / 'POD:COMBINED' /
& mordictkey(18) / 'POD:RATIO' /
& mordictkey(19) / 'POD:AUGMENT' /
& mordictkey(20) / 'QOI' /
& mordictkey(21) / 'QOI:FREQ' /
& mordictkey(22) / 'QOI:TKE' /
& mordictkey(23) / 'QOI:DRAG' /
& mordictkey(24) / 'QOI:NU' /
& mordictkey(25) / 'QOI:NINTP' /
& mordictkey(26) / 'COPT' /
& mordictkey(27) / 'COPT:MODE' /
& mordictkey(28) / 'COPT:FIELD' /
& mordictkey(29) / 'COPT:BARRIER' /
& mordictkey(30) / 'COPT:BOXTOL' /
& mordictkey(31) / 'COPT:VPAR0' /
& mordictkey(32) / 'COPT:VNLOOP' /
& mordictkey(33) / 'COPT:TPAR0' /
& mordictkey(34) / 'COPT:TNLOOP' /
& mordictkey(35) / 'FAST' /
& mordictkey(36) / 'FAST:CEVAL' /
& mordictkey(37) / 'FAST:HEVAL' /
& mordictkey(38) / 'FORCING' /
& mordictkey(39) / 'FORCING:BODY' /
& mordictkey(40) / 'FORCING:SOURCE' /
& mordictkey(41) / 'FORCING:BUOYANCY' /
& mordictkey(42) / 'FILTER' /
& mordictkey(43) / 'FILTER:LOCATION' /
& mordictkey(44) / 'FILTER:TYPE' /
& mordictkey(45) / 'FILTER:MODES' /
& mordictkey(46) / 'FILTER:RADIUS' /
& mordictkey(47) / 'BUOYANCY' /
& mordictkey(48) / 'BUOYANCY:MAGNITUDE' /
& mordictkey(49) / 'BUOYANCY:ANGLE' /
& mordictkey(50) / 'BUOYANCY:GX' /
& mordictkey(51) / 'BUOYANCY:GY' /
& mordictkey(52) / 'BUOYANCY:GZ' /
& mordictkey(53) / 'EI' /
& mordictkey(54) / 'EI:MODE' /
& mordictkey(55) / 'EI:EQN' /
& mordictkey(56) / 'TENDEC' /
& mordictkey(57) / 'TENDEC:MODE' /
& mordictkey(58) / 'TENDEC:RANK' /
& mordictkey(59) / 'TENDEC:CORE' /
& mordictkey(60) / 'TENDEC:SKEW' /
& mordictkey(14) / 'GENERAL:NBAT' /
& mordictkey(15) / 'POD' /
& mordictkey(16) / 'POD:TYPE' /
& mordictkey(17) / 'POD:MODE0' /
& mordictkey(18) / 'POD:COMBINED' /
& mordictkey(19) / 'POD:RATIO' /
& mordictkey(20) / 'POD:AUGMENT' /
& mordictkey(21) / 'QOI' /
& mordictkey(22) / 'QOI:FREQ' /
& mordictkey(23) / 'QOI:TKE' /
& mordictkey(24) / 'QOI:DRAG' /
& mordictkey(25) / 'QOI:NU' /
& mordictkey(26) / 'QOI:NINTP' /
& mordictkey(27) / 'COPT' /
& mordictkey(28) / 'COPT:MODE' /
& mordictkey(29) / 'COPT:FIELD' /
& mordictkey(30) / 'COPT:BARRIER' /
& mordictkey(31) / 'COPT:BOXTOL' /
& mordictkey(32) / 'COPT:VPAR0' /
& mordictkey(33) / 'COPT:VNLOOP' /
& mordictkey(34) / 'COPT:TPAR0' /
& mordictkey(35) / 'COPT:TNLOOP' /
& mordictkey(36) / 'FAST' /
& mordictkey(37) / 'FAST:CEVAL' /
& mordictkey(38) / 'FAST:HEVAL' /
& mordictkey(39) / 'FORCING' /
& mordictkey(40) / 'FORCING:BODY' /
& mordictkey(41) / 'FORCING:SOURCE' /
& mordictkey(42) / 'FORCING:BUOYANCY' /
& mordictkey(43) / 'FILTER' /
& mordictkey(44) / 'FILTER:LOCATION' /
& mordictkey(45) / 'FILTER:TYPE' /
& mordictkey(46) / 'FILTER:MODES' /
& mordictkey(47) / 'FILTER:RADIUS' /
& mordictkey(48) / 'BUOYANCY' /
& mordictkey(49) / 'BUOYANCY:MAGNITUDE' /
& mordictkey(50) / 'BUOYANCY:ANGLE' /
& mordictkey(51) / 'BUOYANCY:GX' /
& mordictkey(52) / 'BUOYANCY:GY' /
& mordictkey(53) / 'BUOYANCY:GZ' /
& mordictkey(54) / 'EI' /
& mordictkey(55) / 'EI:MODE' /
& mordictkey(56) / 'EI:EQN' /
& mordictkey(57) / 'TENDEC' /
& mordictkey(58) / 'TENDEC:MODE' /
& mordictkey(59) / 'TENDEC:RANK' /
& mordictkey(60) / 'TENDEC:CORE' /
& mordictkey(61) / 'TENDEC:SKEW' /
62 changes: 62 additions & 0 deletions code/batch.f
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
c-----------------------------------------------------------------------
subroutine breduce(a,n,m)

include 'SIZE'
include 'LMOR'

! global reduction of vector a of length n (batch size m)

common /workbr/ v(lbat),w(lbat)

real a(n)

if (m.gt.lbat) m = lbat
i=1

do while (i.le.n)
call gop(a(i),w,'+ ',min(m,n-i+1))
i=i+m
enddo

return
end
c-----------------------------------------------------------------------
subroutine brprofile

include 'SIZE'
include 'PARALLEL'
include 'LMOR'

common /workbr/ v(lbat),w(lbat)

ntrial = 1024

do n=1,lbat
time_min = 1.0e+10
time_max = 0.0
time_avg = 0.0
time_avg2 = 0.0
do ir=1,n
v(ir) = rand()
enddo
do i=1,ntrial
start_time=dnekclock()
call breduce(v,n,n)
end_time=dnekclock()
time = end_time - start_time
time_min = min(time_min,time)
time_max = max(time_max,time)
time_avg = time_avg + time
time_avg2 = time_avg2 + time*time
enddo
time_avg = time_avg / ntrial
time_avg2 = time_avg2 / ntrial
time_std = sqrt(time_avg2 - time_avg*time_avg)
if (nio.eq.0) then
write (6,*) n, time_min, time_avg, time_max, time_std, np
endif
enddo

return
end
c-----------------------------------------------------------------------
1 change: 1 addition & 0 deletions code/makefile_usr.inc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
$(OBJDIR)/ana.o :$(MOR_DIR)/code/ana.f MOR LMOR; $(FC) -c $(FL2) $< -o $@
$(OBJDIR)/aux.o :$(MOR_DIR)/code/aux.f MOR LMOR; $(FC) -c $(FL2) $< -o $@
$(OBJDIR)/batch.o :$(MOR_DIR)/code/batch.f MOR LMOR; $(FC) -c $(FL2) $< -o $@
$(OBJDIR)/conv.o :$(MOR_DIR)/code/conv.f MOR LMOR; $(FC) -c $(FL2) $< -o $@
$(OBJDIR)/const.o :$(MOR_DIR)/code/const.f MOR LMOR; $(FC) -c $(FL2) $< -o $@
$(OBJDIR)/dump.o :$(MOR_DIR)/code/dump.f MOR LMOR; $(FC) -c $(FL2) $< -o $@
Expand Down
4 changes: 4 additions & 0 deletions code/mpar.f
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ subroutine mpar_read(ierr)
call finiparser_getbool(i_out,'general:setbases',ifnd)
if (ifnd.eq.1) ifsetbases=i_out.eq.1

call finiparser_getdbl(d_out,'general:nbat',ifnd)
if (ifnd.eq.1) nbat=nint(d_out)

ibuoy=0

call finiparser_getdbl(d_out,'buoyancy:magnitude',ifnd)
Expand Down Expand Up @@ -534,6 +537,7 @@ subroutine bcastmpar
call bcast(tbarrseq,isize)
call bcast(nintp,isize)
call bcast(iaug,isize)
call bcast(nbat,isize)

! reals

Expand Down
Loading

0 comments on commit 0c56bdd

Please sign in to comment.