From 83f44b2e72b204dec1f32fd5ca92b01a866b5d77 Mon Sep 17 00:00:00 2001
From: William Xu <chengzhu.xu@oregonstate.edu>
Date: Wed, 23 Oct 2024 20:14:05 -0300
Subject: [PATCH 1/3] Inline harmonic analysis

Important bug fix:
    1) The Cholesky decomposition was operating on entries below
the main diagonal of FtF, whereas in the accumulator of FtF, only
entries along and above the main diagonal were calculated. In this
revision, I modified HA_accum_FtF so that entries below the main
diagonal are accumulated instead.
    2) In the accumulator of FtSSH, the first entry for the mean
(zero frequency) is moved out of the loop over different tidal
constituents, so that it is not accumulated multiple times within
a single time step.
---
 src/diagnostics/MOM_harmonic_analysis.F90 | 88 +++++++++++++----------
 1 file changed, 51 insertions(+), 37 deletions(-)

diff --git a/src/diagnostics/MOM_harmonic_analysis.F90 b/src/diagnostics/MOM_harmonic_analysis.F90
index 1e3b9895cb..bdb493713a 100644
--- a/src/diagnostics/MOM_harmonic_analysis.F90
+++ b/src/diagnostics/MOM_harmonic_analysis.F90
@@ -173,6 +173,7 @@ end subroutine HA_register
 
 !> This subroutine accumulates the temporal basis functions in FtF.
 !! The tidal constituents are those used in MOM_tidal_forcing, plus the mean (of zero frequency).
+!! Only the main diagonal and entries below it are calculated, which are needed for Cholesky decomposition.
 subroutine HA_accum_FtF(Time, CS)
   type(time_type),            intent(in)    :: Time    !< The current model time
   type(harmonic_analysis_CS), intent(inout) :: CS      !< Control structure of the MOM_harmonic_analysis module
@@ -191,27 +192,31 @@ subroutine HA_accum_FtF(Time, CS)
   nc  = CS%nc
   now = CS%US%s_to_T * time_type_to_real(Time - CS%time_ref)
 
-  ! Accumulate FtF
-  CS%FtF(1,1) = CS%FtF(1,1) + 1.0         !< For the zero frequency
+  !< First entry, corresponding to the zero frequency constituent (mean)
+  CS%FtF(1,1) = CS%FtF(1,1) + 1.0
+
   do c=1,nc
     icos = 2*c
     isin = 2*c+1
     cosomegat = cos(CS%freq(c) * now + CS%phase0(c))
     sinomegat = sin(CS%freq(c) * now + CS%phase0(c))
+
+    ! First column, corresponding to the zero frequency constituent (mean)
     CS%FtF(icos,1) = CS%FtF(icos,1) + cosomegat
     CS%FtF(isin,1) = CS%FtF(isin,1) + sinomegat
-    CS%FtF(1,icos) = CS%FtF(icos,1)
-    CS%FtF(1,isin) = CS%FtF(isin,1)
-    do cc=c,nc
+
+    do cc=1,c
       iccos = 2*cc
       issin = 2*cc+1
       ccosomegat = cos(CS%freq(cc) * now + CS%phase0(cc))
       ssinomegat = sin(CS%freq(cc) * now + CS%phase0(cc))
+
+      ! Interior of the matrix, corresponding to the products of cosine and sine terms
       CS%FtF(icos,iccos) = CS%FtF(icos,iccos) + cosomegat * ccosomegat
       CS%FtF(icos,issin) = CS%FtF(icos,issin) + cosomegat * ssinomegat
       CS%FtF(isin,iccos) = CS%FtF(isin,iccos) + sinomegat * ccosomegat
       CS%FtF(isin,issin) = CS%FtF(isin,issin) + sinomegat * ssinomegat
-    enddo ! cc=c,nc
+    enddo ! cc=1,c
   enddo ! c=1,nc
 
 end subroutine HA_accum_FtF
@@ -276,14 +281,18 @@ subroutine HA_accum_FtSSH(key, data, Time, G, CS)
 
   is = ha1%is ; ie = ha1%ie ; js = ha1%js ; je = ha1%je
 
-  ! Accumulate FtF and FtSSH
+  !< First entry, corresponding to the zero frequency constituent (mean)
+  do j=js,je ; do i=is,ie
+    ha1%FtSSH(i,j,1) = ha1%FtSSH(i,j,1) + (data(i,j) - ha1%ref(i,j))
+  enddo ; enddo
+
+  !< The remaining entries
   do c=1,nc
     icos = 2*c
     isin = 2*c+1
     cosomegat = cos(CS%freq(c) * now + CS%phase0(c))
     sinomegat = sin(CS%freq(c) * now + CS%phase0(c))
     do j=js,je ; do i=is,ie
-      ha1%FtSSH(i,j,1)    = ha1%FtSSH(i,j,1)    + (data(i,j) - ha1%ref(i,j))
       ha1%FtSSH(i,j,icos) = ha1%FtSSH(i,j,icos) + (data(i,j) - ha1%ref(i,j)) * cosomegat
       ha1%FtSSH(i,j,isin) = ha1%FtSSH(i,j,isin) + (data(i,j) - ha1%ref(i,j)) * sinomegat
     enddo ; enddo
@@ -362,74 +371,79 @@ subroutine HA_write(ha1, Time, G, CS)
 
 end subroutine HA_write
 
-!> This subroutine computes the harmonic constants (stored in FtSSHw) using the dot products of the temporal
+!> This subroutine computes the harmonic constants (stored in x) using the dot products of the temporal
 !! basis functions accumulated in FtF, and the dot products of the SSH (or other fields) with the temporal basis
 !! functions accumulated in FtSSH. The system is solved by Cholesky decomposition,
 !!
-!!     FtF * FtSSHw = FtSSH,    =>    FtFw * (FtFw' * FtSSHw) = FtSSH,
+!!     FtF * x = FtSSH,    =>    L * (L' * x) = FtSSH,    =>    L * y = FtSSH,
 !!
-!! where FtFw is a lower triangular matrix, and the prime denotes matrix transpose.
+!! where L is the lower triangular matrix, y = L' * x, and x is the solution vector.
 !!
-subroutine HA_solver(ha1, nc, FtF, FtSSHw)
+subroutine HA_solver(ha1, nc, FtF, x)
   type(HA_type), pointer,              intent(in)  :: ha1    !< Control structure for the current field
   integer,                             intent(in)  :: nc     !< Number of harmonic constituents
   real, dimension(:,:),                intent(in)  :: FtF    !< Accumulator of (F' * F) for all fields [nondim]
-  real, dimension(:,:,:), allocatable, intent(out) :: FtSSHw !< Work array for Cholesky decomposition [A]
+  real, dimension(:,:,:), allocatable, intent(out) :: x      !< Solution vector of harmonic constants [A]
 
   ! Local variables
-  real :: tmp0                              !< Temporary variable for Cholesky decomposition [nondim]
-  real, dimension(:),   allocatable :: tmp1 !< Temporary variable for Cholesky decomposition [nondim]
-  real, dimension(:,:), allocatable :: tmp2 !< Temporary variable for Cholesky decomposition [A]
-  real, dimension(:,:), allocatable :: FtFw !< Lower triangular matrix for Cholesky decomposition [nondim]
+  real :: tmp0                                !< Temporary variable for Cholesky decomposition [nondim]
+  real, dimension(:,:),   allocatable :: L    !< Lower triangular matrix of Cholesky decomposition [nondim]
+  real, dimension(:),     allocatable :: tmp1 !< Inverse of the diagonal entries of L [nondim]
+  real, dimension(:,:),   allocatable :: tmp2 !< 2D temporary array involving FtSSH [A]
+  real, dimension(:,:,:), allocatable :: y    !< 3D temporary array, i.e., L' * x [A]
   integer :: k, m, n, is, ie, js, je
 
   is = ha1%is ; ie = ha1%ie ; js = ha1%js ; je = ha1%je
 
+  allocate(L(1:2*nc+1,1:2*nc+1), source=0.0)
   allocate(tmp1(1:2*nc+1), source=0.0)
   allocate(tmp2(is:ie,js:je), source=0.0)
-  allocate(FtFw(1:2*nc+1,1:2*nc+1), source=0.0)
-  allocate(FtSSHw(is:ie,js:je,2*nc+1), source=0.0)
+  allocate(x(is:ie,js:je,2*nc+1), source=0.0)
+  allocate(y(is:ie,js:je,2*nc+1), source=0.0)
 
-  ! Construct FtFw
-  FtFw(:,:) = 0.0
+  ! Cholesky decomposition
   do m=1,2*nc+1
+
+    ! First, calculate the diagonal entries
     tmp0 = 0.0
-    do k=1,m-1
-      tmp0 = tmp0 + FtFw(m,k) * FtFw(m,k)
+    do k=1,m-1                             ! This loop operates along the m-th row
+      tmp0 = tmp0 + L(m,k) * L(m,k)
     enddo
-    FtFw(m,m) = sqrt(FtF(m,m) - tmp0)
-    tmp1(m) = 1 / FtFw(m,m)
-    do k=m+1,2*nc+1
+    L(m,m) = sqrt(FtF(m,m) - tmp0)         ! This is the m-th diagonal entry
+
+    ! Now calculate the off-diagonal entries
+    tmp1(m) = 1 / L(m,m)
+    do k=m+1,2*nc+1                        ! This loop operates along the column below the m-th diagonal entry
       tmp0 = 0.0
       do n=1,m-1
-        tmp0 = tmp0 + FtFw(k,n) * FtFw(m,n)
+        tmp0 = tmp0 + L(k,n) * L(m,n)
       enddo
-      FtFw(k,m) = (FtF(k,m) - tmp0) * tmp1(m)
+      L(k,m) = (FtF(k,m) - tmp0) * tmp1(m) ! This is the k-th off-diagonal entry below the m-th diagonal entry
     enddo
   enddo
 
-  ! Solve for (FtFw' * FtSSHw)
-  FtSSHw(:,:,:) = ha1%FtSSH(:,:,:)
+  ! Solve for y from L * y = FtSSH
   do k=1,2*nc+1
     tmp2(:,:) = 0.0
     do m=1,k-1
-      tmp2(:,:) = tmp2(:,:) + FtFw(k,m) * FtSSHw(:,:,m)
+      tmp2(:,:) = tmp2(:,:) + L(k,m) * y(:,:,m)
     enddo
-    FtSSHw(:,:,k) = (FtSSHw(:,:,k) - tmp2(:,:)) * tmp1(k)
+    y(:,:,k) = (ha1%FtSSH(:,:,k) - tmp2(:,:)) * tmp1(k)
   enddo
 
-  ! Solve for FtSSHw
+  ! Solve for x from L' * x = y
   do k=2*nc+1,1,-1
     tmp2(:,:) = 0.0
     do m=k+1,2*nc+1
-      tmp2(:,:) = tmp2(:,:) + FtSSHw(:,:,m) * FtFw(m,k)
+      tmp2(:,:) = tmp2(:,:) + L(m,k) * x(:,:,m)
     enddo
-    FtSSHw(:,:,k) = (FtSSHw(:,:,k) - tmp2(:,:)) * tmp1(k)
+    x(:,:,k) = (y(:,:,k) - tmp2(:,:)) * tmp1(k)
   enddo
 
   deallocate(tmp1)
   deallocate(tmp2)
-  deallocate(FtFw)
+  deallocate(L)
+  deallocate(y)
 
 end subroutine HA_solver
 
@@ -441,7 +455,7 @@ end subroutine HA_solver
 !! step, and x is a 2*nc-by-1 vector containing the constant coefficients of the sine/cosine for each constituent
 !! (i.e., the harmonic constants). At each grid point, the harmonic constants are computed using least squares,
 !!
-!!     (F' * F) * x = F' * SSH_in,
+!!     (F' * F) * x = F' * SSH_in,    =>    FtF * x = FtSSH,
 !!
 !! where the prime denotes matrix transpose, and SSH_in is the sea surface height (or other fields) determined by
 !! the model. The dot products (F' * F) and (F' * SSH_in) are computed by accumulating the sums as the model is

From b101d1c46e1ecec97f8280b9aca2c7c96470321e Mon Sep 17 00:00:00 2001
From: William Xu <chengzhu.xu@oregonstate.edu>
Date: Wed, 30 Oct 2024 12:39:30 -0300
Subject: [PATCH 2/3] Inline harmonic analysis

Another bug fix: initial state added back to the mean state.
---
 src/diagnostics/MOM_harmonic_analysis.F90 | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/diagnostics/MOM_harmonic_analysis.F90 b/src/diagnostics/MOM_harmonic_analysis.F90
index bdb493713a..abeb69eaaa 100644
--- a/src/diagnostics/MOM_harmonic_analysis.F90
+++ b/src/diagnostics/MOM_harmonic_analysis.F90
@@ -324,7 +324,7 @@ subroutine HA_write(ha1, Time, G, CS)
   ! Local variables
   real, dimension(:,:,:), allocatable :: FtSSHw    !< An array containing the harmonic constants [A]
   integer :: year, month, day, hour, minute, second
-  integer :: nc, k, is, ie, js, je
+  integer :: nc, i, j, k, is, ie, js, je
 
   character(len=255)           :: filename         !< Output file name
   type(MOM_infra_file)         :: cdf              !< The file handle for output harmonic constants
@@ -357,6 +357,11 @@ subroutine HA_write(ha1, Time, G, CS)
   call create_MOM_file(cdf, trim(filename), cdf_vars, &
                        2*nc+1, cdf_fields, SINGLE_FILE, 86400.0, G=G)
 
+  ! Add the initial field back to the mean state
+  do j=js,je ; do i=is,ie
+    FtSSHw(i,j,1) = FtSSHw(i,j,1) + ha1%ref(i,j)
+  enddo ; enddo
+
   ! Write data
   call MOM_write_field(cdf, cdf_fields(1), G%domain, FtSSHw(:,:,1), 0.0)
   do k=1,nc

From 1c32e40dad8060cb1db56877d66a82556c8713d0 Mon Sep 17 00:00:00 2001
From: William Xu <chengzhu.xu@oregonstate.edu>
Date: Wed, 27 Nov 2024 08:49:39 -0400
Subject: [PATCH 3/3] Inline harmonic analysis

Minor update to HA_solver
---
 src/diagnostics/MOM_harmonic_analysis.F90 | 26 ++++++-----------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/src/diagnostics/MOM_harmonic_analysis.F90 b/src/diagnostics/MOM_harmonic_analysis.F90
index abeb69eaaa..0013a8ab83 100644
--- a/src/diagnostics/MOM_harmonic_analysis.F90
+++ b/src/diagnostics/MOM_harmonic_analysis.F90
@@ -388,23 +388,16 @@ subroutine HA_solver(ha1, nc, FtF, x)
   type(HA_type), pointer,              intent(in)  :: ha1    !< Control structure for the current field
   integer,                             intent(in)  :: nc     !< Number of harmonic constituents
   real, dimension(:,:),                intent(in)  :: FtF    !< Accumulator of (F' * F) for all fields [nondim]
-  real, dimension(:,:,:), allocatable, intent(out) :: x      !< Solution vector of harmonic constants [A]
+  real, dimension(ha1%is:ha1%ie,ha1%js:ha1%je,2*nc+1), &
+                                       intent(out) :: x      !< Solution vector of harmonic constants [A]
 
   ! Local variables
   real :: tmp0                                !< Temporary variable for Cholesky decomposition [nondim]
-  real, dimension(:,:),   allocatable :: L    !< Lower triangular matrix of Cholesky decomposition [nondim]
-  real, dimension(:),     allocatable :: tmp1 !< Inverse of the diagonal entries of L [nondim]
-  real, dimension(:,:),   allocatable :: tmp2 !< 2D temporary array involving FtSSH [A]
-  real, dimension(:,:,:), allocatable :: y    !< 3D temporary array, i.e., L' * x [A]
-  integer :: k, m, n, is, ie, js, je
-
-  is = ha1%is ; ie = ha1%ie ; js = ha1%js ; je = ha1%je
-
-  allocate(L(1:2*nc+1,1:2*nc+1), source=0.0)
-  allocate(tmp1(1:2*nc+1), source=0.0)
-  allocate(tmp2(is:ie,js:je), source=0.0)
-  allocate(x(is:ie,js:je,2*nc+1), source=0.0)
-  allocate(y(is:ie,js:je,2*nc+1), source=0.0)
+  real, dimension(2*nc+1,2*nc+1)      :: L    !< Lower triangular matrix of Cholesky decomposition [nondim]
+  real, dimension(2*nc+1)             :: tmp1 !< Inverse of the diagonal entries of L [nondim]
+  real, dimension(ha1%is:ha1%ie,ha1%js:ha1%je)        :: tmp2 !< 2D temporary array involving FtSSH [A]
+  real, dimension(ha1%is:ha1%ie,ha1%js:ha1%je,2*nc+1) :: y    !< 3D temporary array, i.e., L' * x [A]
+  integer :: k, m, n
 
   ! Cholesky decomposition
   do m=1,2*nc+1
@@ -445,11 +438,6 @@ subroutine HA_solver(ha1, nc, FtF, x)
     x(:,:,k) = (y(:,:,k) - tmp2(:,:)) * tmp1(k)
   enddo
 
-  deallocate(tmp1)
-  deallocate(tmp2)
-  deallocate(L)
-  deallocate(y)
-
 end subroutine HA_solver
 
 !> \namespace harmonic_analysis