From d40f311e10d0b52066fd884996c5ee0d893858b5 Mon Sep 17 00:00:00 2001 From: Kento Kaneko Date: Fri, 5 Apr 2024 12:36:32 -0400 Subject: [PATCH] Apply breduce to reduce snapshot projection time (#217) --- .github/workflows/integ.yml | 2 +- .github/workflows/integ_t.yaml | 2 +- .github/workflows/unit.yml | 2 +- .github/workflows/unit_rbf.yaml | 2 +- .github/workflows/unit_rf.yaml | 2 +- bin/makerom | 2 +- bin/setup | 2 +- code/MOR | 4 +- code/batch.f | 1 + code/ip.f | 135 ++++++++++++++++++++++++++++++++ code/makefile_usr.inc | 1 + code/pod.f | 83 ++++++++++++++++---- code/rom.f | 20 ++--- tests/unit.f | 2 +- 14 files changed, 221 insertions(+), 39 deletions(-) create mode 100644 code/ip.f diff --git a/.github/workflows/integ.yml b/.github/workflows/integ.yml index 661ce6c3..4f017f42 100644 --- a/.github/workflows/integ.yml +++ b/.github/workflows/integ.yml @@ -10,7 +10,7 @@ jobs: test: [ CYL ] env: MOR_DIR: /home/runner/work/NekROM/NekROM - USR: "ana.o aux.o conv.o dump.o ei.o filter.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o tensor.o riesz.o batch.o" + USR: "ana.o aux.o batch.o conv.o dump.o ei.o filter.o ip.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o tensor.o riesz.o" steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/integ_t.yaml b/.github/workflows/integ_t.yaml index 4a333caa..1177ce2d 100644 --- a/.github/workflows/integ_t.yaml +++ b/.github/workflows/integ_t.yaml @@ -10,7 +10,7 @@ jobs: test: [ ANN ] env: MOR_DIR: /home/runner/work/NekROM/NekROM - USR: "ana.o aux.o conv.o dump.o ei.o filter.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o tensor.o riesz.o batch.o" + USR: "ana.o aux.o batch.o conv.o dump.o ei.o filter.o ip.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o tensor.o riesz.o" steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/unit.yml b/.github/workflows/unit.yml index a4096dd5..352e7873 100644 --- a/.github/workflows/unit.yml +++ b/.github/workflows/unit.yml @@ -11,7 +11,7 @@ jobs: ips: [L2, H10] env: MOR_DIR: /home/runner/work/NekROM/NekROM - USR: "ana.o aux.o conv.o dump.o ei.o filter.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o tensor.o riesz.o batch.o" + USR: "ana.o aux.o batch.o conv.o dump.o ei.o filter.o ip.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o tensor.o riesz.o" steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/unit_rbf.yaml b/.github/workflows/unit_rbf.yaml index ffb8b017..d20fb008 100644 --- a/.github/workflows/unit_rbf.yaml +++ b/.github/workflows/unit_rbf.yaml @@ -10,7 +10,7 @@ jobs: test: [RBF_WT] env: MOR_DIR: /home/runner/work/NekROM/NekROM - USR: "ana.o aux.o conv.o dump.o ei.o filter.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o tensor.o riesz.o batch.o" + USR: "ana.o aux.o batch.o conv.o dump.o ei.o filter.o ip.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o tensor.o riesz.o" steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/unit_rf.yaml b/.github/workflows/unit_rf.yaml index 5677e8bb..30eb5d65 100644 --- a/.github/workflows/unit_rf.yaml +++ b/.github/workflows/unit_rf.yaml @@ -10,7 +10,7 @@ jobs: test: [rf] env: MOR_DIR: /home/runner/work/NekROM/NekROM - USR: "ana.o aux.o conv.o dump.o ei.o filter.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o tensor.o riesz.o batch.o" + USR: "ana.o aux.o batch.o conv.o dump.o ei.o filter.o ip.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o tensor.o riesz.o" steps: - uses: actions/checkout@v2 diff --git a/bin/makerom b/bin/makerom index bafb1e08..7728f849 100755 --- a/bin/makerom +++ b/bin/makerom @@ -1,4 +1,4 @@ #!/bin/bash $MOR_DIR/bin/linkm -USR="$USR ana.o aux.o conv.o dump.o ei.o filter.o lapack.o pod.o qoi.o rom.o read.o time.o const.o rk.o legacy.o tensor.o mpar.o riesz.o batch.o" makenek $1 +USR="$USR ana.o aux.o conv.o dump.o ei.o filter.o lapack.o pod.o qoi.o rom.o read.o time.o const.o rk.o legacy.o tensor.o mpar.o riesz.o batch.o ip.o" makenek $1 diff --git a/bin/setup b/bin/setup index 49fb40dc..5c7f034f 100755 --- a/bin/setup +++ b/bin/setup @@ -2,4 +2,4 @@ export MOR_DIR=$(cd ..; pwd -P) export PATH=$MOR_DIR/bin:$PATH -export USR="$USR ana.o aux.o conv.o dump.o ei.o filter.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o riesz.o batch.o" +export USR="$USR ana.o aux.o conv.o dump.o ei.o filter.o lapack.o pod.o qoi.o rom.o read.o time.o const.o unit.o rk.o legacy.o mpar.o riesz.o batch.o ip.o" diff --git a/code/MOR b/code/MOR index dd857130..44ee0ec4 100644 --- a/code/MOR +++ b/code/MOR @@ -20,8 +20,8 @@ c parameters and common blocks for MOR $ pic(lx2*ly2*lz2*lelm), $ tic(lx1*ly1*lz1*lelm) - common /morcoef/ uk((lub+1)*lsu),tk((ltb+1)*lst),timek(lcs), - $ edk((lub+1)*lsu) + common /morcoef/ uk((lub+1)*(lsu+1)),tk((ltb+1)*(lst+1)), + $ timek(lcs),edk((lub+1)*(lsu+1)) common /morcoef2/ ukp((lub+1)*lsu),tkp((ltb+1)*lst) common /morusnap/ us0(lx1*ly1*lz1*lelm,ldim,lsu) diff --git a/code/batch.f b/code/batch.f index f973df61..8db39090 100644 --- a/code/batch.f +++ b/code/batch.f @@ -10,6 +10,7 @@ subroutine breduce(a,n,m) real a(n) + if (m.le.0) return if (m.gt.lbat) m = lbat i=1 diff --git a/code/ip.f b/code/ip.f new file mode 100644 index 00000000..a527595b --- /dev/null +++ b/code/ip.f @@ -0,0 +1,135 @@ +c----------------------------------------------------------------------- + subroutine iop(uu,u,imesh) + + include 'SIZE' + include 'MASS' + + parameter (lt=lx1*ly1*lz1*lelt) + + real uu(lt,1),u(lt,1) + + if (imesh.eq.1) then + n=lx1*ly1*lz1*nelv + do idim=1,ndim + call copy(uu(1,idim),u(1,idim),n) + enddo + else + n=lx1*ly1*lz1*nelt + call copy(uu,u,n) + endif + + return + end +c----------------------------------------------------------------------- + subroutine bop(bu,u,imesh) + + include 'SIZE' + include 'MASS' + + parameter (lt=lx1*ly1*lz1*lelt) + + real bu(lt,1),u(lt,1) + + if (imesh.eq.1) then + n=lx1*ly1*lz1*nelv + do idim=1,ndim + call col3(bu(1,idim),u(1,idim),bm1,n) + enddo + else + n=lx1*ly1*lz1*nelt + call col3(bu,u,bm1,n) + endif + + return + end +c----------------------------------------------------------------------- + subroutine aop(au,u,af,imesh) + + include 'SIZE' + include 'LMOR' + + parameter (lt=lx1*ly1*lz1*lelt) + + common /morid/ ones(lx1*ly1*lz1*lelm),zeros(lx1*ly1*lz1*lelm) + + real au(lt,1),u(lt,1),af(1) + + if (imesh.eq.1) then + do idim=1,ndim + call axhelm(au(1,idim),u(1,idim),af,zeros,imesh,idim) + enddo + else + call axhelm(au,u,af,zeros,imesh,1) + endif + + return + end +c----------------------------------------------------------------------- + subroutine hop(hu,u,af,bf,imesh) + + include 'SIZE' + + parameter (lt=lx1*ly1*lz1*lelt) + + real hu(lt,1),u(lt,1),af(1),bf(1) + + if (imesh.eq.1) then + do idim=1,ndim + call axhelm(hu(1,idim),u(1,idim),af,bf,imesh,idim) + enddo + else + call axhelm(hu,u,af,bf,imesh,1) + endif + + return + end +c----------------------------------------------------------------------- + subroutine uip(res,u,v,nv,itype,mdim,nbat,wk,af,bf) + + ! returns inner-product of u and v based on itype + ! res: inner-product results i.e., entries of |u(1)v(1,i)| + ! itype: 0 = discrete L2, 1 = L2, 2 = H10, 3 = H1 + ! mdim: 1 = thermal, ndim = velocity + ! nbat: number of elements in a batch for gop + ! wk: work array + ! af (itype.gt.1): property fields for aop + ! bf (itype.gt.2): property fields for hop + + include 'SIZE' + include 'LMOR' + + parameter (lt=lx1*ly1*lz1*lelt) + + real res(nv),u(lt,1),v(lt,mdim,nv),wk(lt,ldim),af(lt),bf(lt) + + imesh=1 + if (mdim.eq.1) imesh=2 + + if (imesh.eq.1) then + n=lx1*ly1*lz1*nelv + else + n=lx1*ly1*lz1*nelt + endif + + if (itype.eq.0) then + call iop(wk,u,imesh) + else if (itype.eq.1) then + call bop(wk,u,imesh) + else if (itype.eq.2) then + call aop(wk,u,af,imesh) + else if (itype.eq.3) then + call hop(wk,u,af,bf,imesh) + endif + + do j=1,nv + res(j)=0. + do idim=1,mdim + res(j)=res(j)+vlsc2(wk(1,idim),v(1,idim,j),n) + enddo + enddo + + call breduce(res,nv,nbat) + + return + end +c----------------------------------------------------------------------- diff --git a/code/makefile_usr.inc b/code/makefile_usr.inc index 7b593f66..56ddab10 100644 --- a/code/makefile_usr.inc +++ b/code/makefile_usr.inc @@ -6,6 +6,7 @@ $(OBJDIR)/const.o :$(MOR_DIR)/code/const.f MOR LMOR; $(FC) -c $(FL2) $< -o $@ $(OBJDIR)/dump.o :$(MOR_DIR)/code/dump.f MOR LMOR; $(FC) -c $(FL2) $< -o $@ $(OBJDIR)/ei.o :$(MOR_DIR)/code/ei.f MOR LMOR; $(FC) -c $(FL2) $< -o $@ $(OBJDIR)/filter.o :$(MOR_DIR)/code/filter.f MOR LMOR; $(FC) -c $(FL2) $< -o $@ +$(OBJDIR)/ip.o :$(MOR_DIR)/code/ip.f MOR LMOR; $(FC) -c $(FL2) $< -o $@ $(OBJDIR)/lapack.o :$(MOR_DIR)/code/lapack.f MOR LMOR; $(FC) -c $(FL0) $< -o $@ $(OBJDIR)/legacy.o :$(MOR_DIR)/code/legacy.f MOR LMOR; $(FC) -c $(FL2) $< -o $@ $(OBJDIR)/pod.o :$(MOR_DIR)/code/pod.f MOR LMOR; $(FC) -c $(FL2) $< -o $@ diff --git a/code/pod.f b/code/pod.f index 58cbb8cb..9d9c1ec4 100644 --- a/code/pod.f +++ b/code/pod.f @@ -32,7 +32,13 @@ subroutine setbases call opcopy(ub(1,ib),vb(1,ib),wb(1,ib), $ uvwb(1,1,ib),uvwb(1,2,ib),uvwb(1,ldim,ib)) enddo - if (.not.ifcomb.and.ifpb) call vnorm(ub,vb,wb) + if (.not.ifcomb.and.ifpb) then + call vnorm(ub,vb,wb) + do ib=1,nb + call opcopy(uvwb(1,1,ib),uvwb(1,2,ib),uvwb(1,ldim,ib) + $ ,ub(1,ib),vb(1,ib),wb(1,ib)) + enddo + endif else call opcopy(ub,vb,wb,uic,vic,wic) endif @@ -130,6 +136,65 @@ subroutine ps2k(ck,ux,uub) ! implement read here endif + return + end +c----------------------------------------------------------------------- + subroutine p2k(ck,usnap0,sb,mdim,wk) + + ! set snapshot coefficients for a given vector basis + + ! ck := coefficients + ! usnap0 := snapshots (assume 0th mode is subtracted) + ! sb := basis functions + ! mdim := 1 -> scalar, ndim -> vector + + include 'SIZE' + include 'TOTAL' + include 'MOR' + + parameter (lt=lx1*ly1*lz1*lelt) + + common /scrgg/ uu(lt),vv(lt),ww(lt) + + real ck(0:nb,1),usnap0(lt,mdim,ls), + $ sb(lt,mdim,0:nb),wk(ns) + + n=lx1*ly1*lz1*nelt + + if (rmode.eq.'ALL'.or.rmode.eq.'OFF'.or.rmode.eq.'AEQ') then + if (ips.eq.'L2 ') then + itype=1 + else if (ips.eq.'H10') then + itype=2 + call copy(uu,ones,n) + else if (ips.eq.'HLM') then + itype=3 + r1=1./ad_re + r2=ad_beta(1,3)/ad_dt + call cfill(uu,r1,n) + call cfill(vv,r2,n) + endif + + do i=1,ns+1 + ck(0,i)=1. + enddo + + do ib=1,nb + call uip(ck(ib,ns+1),sb(1,1,ib),sb(1,1,ib),1, + $ itype,mdim,0,fldtmp,uu,vv) + enddo + call breduce(ck(1,ns+1),nb,nbat) + call invcol1(ck(1,ns+1),nb) + + do ib=1,nb + call uip(wk,sb(1,1,ib),usnap0(1,1,1),ns, + $ itype,mdim,nbat,fldtmp,uu,vv) + do i=1,ns + ck(ib,i)=wk(i)*ck(ib,ns+1) + enddo + enddo + endif + return end c----------------------------------------------------------------------- @@ -155,19 +220,6 @@ subroutine pv2k(ck,usnap,uub,vvb,wwb) n=lx1*ly1*lz1*nelt if (rmode.eq.'ALL'.or.rmode.eq.'OFF'.or.rmode.eq.'AEQ') then -c if (ips.eq.'H10') then -c do j=1,ns -c call axhelm(uu,usnap(1,1,j),ones,zeros,1,1) -c call axhelm(vv,usnap(1,2,j),ones,zeros,1,2) -c if (ldim.eq.3) -c $ call axhelm(ww,usnap(1,ldim,j),ones,zeros,1,3) -c ck(0,j)=1. -c do i=1,nb -c ck(i,j)=glsc2(uu,uub(1,i),n)+glsc2(vv,vvb(1,i),n) -c if (ldim.eq.3) ck(i,j)=ck(i,j)+glsc2(ww,wwb(1,i),n) -c enddo -c enddo -c else do i=1,ns if (nio.eq.0) write (6,*) 'pv2k: ',i,'/',ns nio=-1 @@ -175,9 +227,6 @@ subroutine pv2k(ck,usnap,uub,vvb,wwb) $ uub,vvb,wwb) nio=nid enddo -c endif - else - ! implement read here endif return diff --git a/code/rom.f b/code/rom.f index 2e5c6a30..2dab1820 100644 --- a/code/rom.f +++ b/code/rom.f @@ -590,9 +590,9 @@ subroutine setmisc call nekgsync proj_time=dnekclock() - if (ifpod(1)) call pv2k(uk,us0,ub,vb,wb) - if (ifpod(2)) call ps2k(tk,ts0(1,1,1),tb(1,0,1)) - if (ifedvs) call ps2k(edk,ts0(1,1,4),tb(1,0,4)) + if (ifpod(1)) call p2k(uk,us0,uvwb,ndim,ukp) + if (ifpod(2)) call p2k(tk,ts0(1,1,1),tb(1,0,1),1,tkp) + if (ifedvs) call p2k(edk,ts0(1,1,4),tb(1,0,4),1,ukp) call nekgsync if (nio.eq.0) write (6,*) 'proj_time:',dnekclock()-proj_time @@ -1336,15 +1336,11 @@ subroutine setc(cl,fname) call cc(cu,1) endif endif - do i=1,nb - if (ifield.eq.1) then - rtmp1(i,1)=op_vlsc2_wt(ub(1,i),vb(1,i),wb(1,i), - $ cu(1,1),cu(1,2),cu(1,ldim),ones) - else - rtmp1(i,1)=vlsc2(tb(1,i,1),cu,n) - endif - enddo - call breduce(rtmp1,nb,nbat) + if (ifield.eq.1) then + call uip(rtmp1,cu,uvwb(1,1,1),nb,0,ndim,nbat,fldtmp) + else + call uip(rtmp1,cu,tb(1,1,1),nb,0,1,nbat,fldtmp) + endif do i=1,nb call setc_local(cl,rtmp1(i,1), $ ic1,ic2,jc1,jc2,kc1,kc2,i,j,k) diff --git a/tests/unit.f b/tests/unit.f index 733e010a..bd0ab42e 100644 --- a/tests/unit.f +++ b/tests/unit.f @@ -524,7 +524,7 @@ subroutine cp_unit(iflag) if (nio.eq.0) write (6,*) 'edif',edif,s1,s2 - if (edif.gt.9.e-11) iexit=iexit+8 + if (edif.gt.5.e-10) iexit=iexit+8 call exitm(iexit)