From 56c9d4ecefc74ca5cabff309f52bcb492b28279b Mon Sep 17 00:00:00 2001 From: vcarlier <105044741+vcarlier@users.noreply.github.com> Date: Thu, 7 Sep 2023 15:39:40 +0200 Subject: [PATCH] Add kernels for various linear algebra operations (#310) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add kernels for: - inner product of two `StencilVector` objects belonging to the same space (still called `dot` for now); - `axpy` operation `y = a * x + y` of `StencilVectorSpace`, where `(x, y)` are `StencilVector` objects and `a` is scalar; - Matrix-vector product of `StencilMatrix` (called `dot`). --------- Co-authored-by: Yaman Güçlü Co-authored-by: tomcaruso --- psydac/api/tests/test_equation.py | 2 +- psydac/linalg/basic.py | 33 + psydac/linalg/block.py | 29 + psydac/linalg/kernels/axpy_kernels.py | 61 ++ psydac/linalg/kernels/inner_kernels.py | 100 +++ psydac/linalg/kernels/matvec_kernels.py | 210 ++++++ psydac/linalg/solvers.py | 260 +++---- psydac/linalg/stencil.py | 767 +++++++++++---------- psydac/linalg/tests/test_block.py | 81 ++- psydac/linalg/tests/test_stencil_matrix.py | 6 +- psydac/linalg/tests/test_stencil_vector.py | 35 +- psydac/polar/dense.py | 4 + 12 files changed, 1027 insertions(+), 561 deletions(-) create mode 100644 psydac/linalg/kernels/axpy_kernels.py create mode 100644 psydac/linalg/kernels/inner_kernels.py create mode 100644 psydac/linalg/kernels/matvec_kernels.py diff --git a/psydac/api/tests/test_equation.py b/psydac/api/tests/test_equation.py index 9fe171665..eebfc6ecd 100644 --- a/psydac/api/tests/test_equation.py +++ b/psydac/api/tests/test_equation.py @@ -58,4 +58,4 @@ def test_field_and_constant(backend): xh = equation_h.solve(c=c_value, f=fh) # Verify that solution is equal to c_value - assert np.allclose(xh.coeffs.toarray(), c_value, rtol=1e-10, atol=1e-16) + assert np.allclose(xh.coeffs.toarray(), c_value, rtol=1e-9, atol=1e-16) diff --git a/psydac/linalg/basic.py b/psydac/linalg/basic.py index 70e4281c1..6a545c601 100644 --- a/psydac/linalg/basic.py +++ b/psydac/linalg/basic.py @@ -52,6 +52,25 @@ def dot(self, a, b): """ + @abstractmethod + def axpy(self, a, x, y): + """ + Increment the vector y with the a-scaled vector x, i.e. y = a * x + y, + provided that x and y belong to the same vector space V (self). + The scalar value a may be real or complex, depending on the field of V. + + Parameters + ---------- + a : scalar + The scaling coefficient needed for the operation. + + x : Vector + The vector which is not modified by this function. + + y : Vector + The vector modified by this function (incremented by a * x). + """ + #=============================================================================== class Vector(ABC): """ @@ -76,6 +95,20 @@ def dot(self, other): assert self.space is other.space return self.space.dot(self, other) + def mul_iadd(self, a, x): + """ + Compute self += a * x, where x is another vector of the same space. + + Parameters + ---------- + a : scalar + Rescaling coefficient, which can be cast to the correct dtype. + + x : Vector + Vector belonging to the same space as self. + """ + self.space.axpy(a, x, self) + #------------------------------------- # Deferred methods #------------------------------------- diff --git a/psydac/linalg/block.py b/psydac/linalg/block.py index 702aa866e..fb2737d56 100644 --- a/psydac/linalg/block.py +++ b/psydac/linalg/block.py @@ -85,6 +85,35 @@ def zeros(self): """ return BlockVector(self, [Vi.zeros() for Vi in self._spaces]) + #... + def axpy(self, a, x, y): + """ + Increment the vector y with the a-scaled vector x, i.e. y = a * x + y, + provided that x and y belong to the same vector space V (self). + The scalar value a may be real or complex, depending on the field of V. + + Parameters + ---------- + a : scalar + The scaling coefficient needed for the operation. + + x : BlockVector + The vector which is not modified by this function. + + y : BlockVector + The vector modified by this function (incremented by a * x). + """ + + assert isinstance(x, BlockVector) + assert isinstance(y, BlockVector) + assert x.space is self + assert y.space is self + + for Vi, xi, yi in zip(self.spaces, x.blocks, y.blocks): + Vi.axpy(a, xi, yi) + + x._sync = x._sync and y._sync + #-------------------------------------- # Other properties/methods #-------------------------------------- diff --git a/psydac/linalg/kernels/axpy_kernels.py b/psydac/linalg/kernels/axpy_kernels.py new file mode 100644 index 000000000..9761aa9a1 --- /dev/null +++ b/psydac/linalg/kernels/axpy_kernels.py @@ -0,0 +1,61 @@ +from pyccel.decorators import template + +#======================================================================================================== +@template(name='Tarray', types=['float[:]', 'complex[:]']) +@template(name='T', types=['float', 'complex']) +def axpy_1d(alpha: 'T', x: "Tarray", y: "Tarray"): + """ + Kernel for computing y = alpha * x + y. + + Parameters + ---------- + alpha : float | complex + Scaling coefficient. + + x, y : 1D Numpy arrays of (float | complex) data + Data of the vectors. + """ + n1, = x.shape + for i1 in range(n1): + y[i1] += alpha * x[i1] + +#======================================================================================================== +@template(name='Tarray', types=['float[:,:]', 'complex[:,:]']) +@template(name='T', types=['float', 'complex']) +def axpy_2d(alpha: 'T', x: "Tarray", y: "Tarray"): + """ + Kernel for computing y = alpha * x + y. + + Parameters + ---------- + alpha : float | complex + Scaling coefficient. + + x, y : 2D Numpy arrays of (float | complex) data + Data of the vectors. + """ + n1, n2 = x.shape + for i1 in range(n1): + for i2 in range(n2): + y[i1, i2] += alpha * x[i1, i2] + +#======================================================================================================== +@template(name='Tarray', types=['float[:,:,:]', 'complex[:,:,:]']) +@template(name='T', types=['float', 'complex']) +def axpy_3d(alpha: 'T', x: "Tarray", y: "Tarray"): + """ + Kernel for computing y = alpha * x + y. + + Parameters + ---------- + alpha : float | complex + Scaling coefficient. + + x, y : 3D Numpy arrays of (float | complex) data + Data of the vectors. + """ + n1, n2, n3 = x.shape + for i1 in range(n1): + for i2 in range(n2): + for i3 in range(n3): + y[i1, i2, i3] += alpha * x[i1, i2, i3] diff --git a/psydac/linalg/kernels/inner_kernels.py b/psydac/linalg/kernels/inner_kernels.py new file mode 100644 index 000000000..efb5a6ccb --- /dev/null +++ b/psydac/linalg/kernels/inner_kernels.py @@ -0,0 +1,100 @@ +from pyccel.decorators import template + +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# +#!!!!!!!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!!!!!# +#!!!!!!! Conjugate on the first argument !!!!!!!# +#!!!!!!!!!! This will need an update !!!!!!!!!!!# +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# + +#============================================================================== +@template(name='T', types=['float[:]', 'complex[:]']) +def inner_1d(v1: 'T', v2: 'T', nghost0: 'int64'): + """ + Kernel for computing the inner product (case of two 1D vectors). + + Parameters + ---------- + v1, v2 : 1D NumPy array + Data of the vectors from which we are computing the inner product. + + nghost0 : int + Number of ghost cells of the arrays along the index 0. + + Returns + ------- + res : scalar + Scalar (real or complex) containing the result of the inner product. + """ + shape0, = v1.shape + + res = v1[0] - v1[0] + for i0 in range(nghost0, shape0 - nghost0): + res += v1[i0].conjugate() * v2[i0] + + return res + +#============================================================================== +@template(name='T', types=['float[:,:]', 'complex[:,:]']) +def inner_2d(v1: 'T', v2: 'T', nghost0: 'int64', nghost1: 'int64'): + """ + Kernel for computing the inner product (case of two 2D vectors). + + Parameters + ---------- + v1, v2 : 2D NumPy array + Data of the vectors from which we are computing the inner product. + + nghost0 : int + Number of ghost cells of the arrays along the index 0. + + nghost1 : int + Number of ghost cells of the arrays along the index 1. + + Returns + ------- + res : scalar + Scalar (real or complex) containing the result of the inner product. + """ + shape0, shape1 = v1.shape + + res = v1[0, 0] - v1[0, 0] + for i0 in range(nghost0, shape0 - nghost0): + for i1 in range(nghost1, shape1 - nghost1): + res += v1[i0, i1].conjugate() * v2[i0, i1] + + return res + +#============================================================================== +@template(name='T', types=['float[:,:,:]', 'complex[:,:,:]']) +def inner_3d(v1: 'T', v2: 'T', nghost0: 'int64', nghost1: 'int64', nghost2: 'int64'): + """ + Kernel for computing the inner product (case of two 3D vectors). + + Parameters + ---------- + v1, v2 : 3D NumPy array + Data of the vectors from which we are computing the inner product. + + nghost0 : int + Number of ghost cells of the arrays along the index 0. + + nghost1 : int + Number of ghost cells of the arrays along the index 1. + + nghost2 : int + Number of ghost cells of the arrays along the index 2. + + Returns + ------- + res : scalar + Scalar (real or complex) containing the result of the inner product. + """ + shape0, shape1, shape2 = v1.shape + + res = v1[0, 0, 0] - v1[0, 0, 0] + for i0 in range(nghost0, shape0 - nghost0): + for i1 in range(nghost1, shape1 - nghost1): + for i2 in range(nghost2, shape2 - nghost2): + res += v1[i0, i1, i2].conjugate() * v2[i0, i1, i2] + + return res diff --git a/psydac/linalg/kernels/matvec_kernels.py b/psydac/linalg/kernels/matvec_kernels.py new file mode 100644 index 000000000..758ce0b8d --- /dev/null +++ b/psydac/linalg/kernels/matvec_kernels.py @@ -0,0 +1,210 @@ +from pyccel.decorators import template + + +@template(name='T', types=['float[:]', 'complex[:]']) +@template(name='Tarray', types=['float[:,:]', 'complex[:,:]']) +def matvec_1d(mat00:'Tarray', x0:'T', out0:'T', starts: 'int64[:]', nrows: 'int64[:]', nrows_extra: 'int64[:]', + dm:'int64[:]', cm:'int64[:]', pad_imp:'int64[:]', ndiags:'int64[:]', gpads: 'int64[:]'): + + nrows1 = nrows[0] + dstart1 = starts[0] + dshift1 = dm[0] + cshift1 = cm[0] + ndiags1 = ndiags[0] + dpads1 = gpads[0] + pad_imp1 = pad_imp[0] + + pxm1 = dpads1 * cshift1 + + start_impact1 = dstart1 % dshift1 + + v00 = mat00[0, 0] - mat00[0, 0] + x0[0] - x0[0] + + for i1 in range(nrows1): + v00 *= 0 + x_min1 = pad_imp1 + (i1 + start_impact1) // cshift1 * dshift1 + for k1 in range(ndiags1): + v00 += mat00[pxm1 + i1, k1] * x0[k1 + x_min1] + out0[pxm1 + i1] = v00 + + if 0 < nrows_extra[0]: + pxm1 += nrows1 + start_impact1 += nrows1 + for i1 in range(nrows_extra[0]): + v00 *= 0 + x_min1 = pad_imp1 + (i1 + start_impact1) // cshift1 * dshift1 + for k1 in range(ndiags1 - i1 - 1): + v00 += mat00[pxm1 + i1, k1] * x0[x_min1 + k1] + out0[pxm1 + i1] = v00 + + +@template(name='T', types=['float[:,:]', 'complex[:,:]']) +@template(name='Tarray', types=['float[:,:,:,:]', 'complex[:,:,:,:]']) +def matvec_2d(mat00:'Tarray', x0:'T', out0:'T', starts:'int64[:]', nrows:'int64[:]', nrows_extra:'int64[:]', + dm:'int64[:]', cm:'int64[:]', pad_imp:'int64[:]', ndiags:'int64[:]', gpads: 'int64[:]'): + + nrows1 = nrows[0] + nrows2 = nrows[1] + dstart1 = starts[0] + dstart2 = starts[1] + dshift1 = dm[0] + dshift2 = dm[1] + cshift1 = cm[0] + cshift2 = cm[1] + ndiags1 = ndiags[0] + ndiags2 = ndiags[1] + dpads1 = gpads[0] + dpads2 = gpads[1] + pad_imp1 = pad_imp[0] + pad_imp2 = pad_imp[1] + + pxm1 = dpads1 * cshift1 + pxm2 = dpads2 * cshift2 + + start_impact1 = dstart1 % dshift1 + start_impact2 = dstart2 % dshift2 + + v00 = mat00[0, 0, 0, 0] - mat00[0, 0, 0, 0] + x0[0, 0] - x0[0, 0] + + for i1 in range(nrows1): + for i2 in range(nrows2): + v00 *= 0 + x_min1 = pad_imp1 + (i1 + start_impact1) // cshift1 * dshift1 + x_min2 = pad_imp2 + (i2 + start_impact2) // cshift2 * dshift2 + for k1 in range(ndiags1): + for k2 in range(ndiags2): + v00 += mat00[pxm1 + i1, pxm2 + i2, k1, k2] * x0[k1 + x_min1, k2 + x_min2] + out0[pxm1 + i1, pxm2 + i2] = v00 + + if 0 < nrows_extra[0]: + pxm1 += nrows1 + start_impact1 += nrows1 + for i1 in range(nrows_extra[0]): + for i2 in range(nrows2): + v00 *= 0 + x_min1 = pad_imp1 + (i1 + start_impact1) // cshift1 * dshift1 + x_min2 = pad_imp2 + (i2 + start_impact2) // cshift2 * dshift2 + for k1 in range(ndiags1 - i1 - 1): + for k2 in range(ndiags2): + v00 += mat00[pxm1 + i1, pxm2 + i2, k1, k2] * x0[x_min1 + k1, x_min2 + k2] + out0[pxm1 + i1, pxm2 + i2] = v00 + + if 0 < nrows_extra[1]: + pxm1 = dpads1 * cshift1 + start_impact1 = dstart1 % dshift1 + pxm2 += nrows2 + start_impact2 += nrows2 + for i1 in range(nrows1 + nrows_extra[0]): + for i2 in range(nrows_extra[1]): + v00 *= 0 + x_min1 = pad_imp1 + (i1 + start_impact1) // cshift1 * dshift1 + x_min2 = pad_imp2 + (i2 + start_impact2) // cshift2 * dshift2 + for k1 in range(ndiags1 - max(0, i1 + 1 - nrows1)): + for k2 in range(ndiags2 - i2 - 1): + v00 += mat00[pxm1 + i1, pxm2 + i2, k1, k2] * x0[x_min1 + k1, x_min2 + k2] + out0[pxm1 + i1, pxm2 + i2] = v00 + + +@template(name='T', types=['float[:,:,:]', 'complex[:,:,:]']) +@template(name='Tarray', types=['float[:,:,:,:,:,:]', 'complex[:,:,:,:,:,:]']) +def matvec_3d(mat00:'Tarray', x0:'T', out0:'T', starts:'int64[:]', nrows:'int64[:]', nrows_extra:'int64[:]', + dm:'int64[:]', cm:'int64[:]', pad_imp:'int64[:]', ndiags:'int64[:]', gpads: 'int64[:]'): + + nrows1 = nrows[0] + nrows2 = nrows[1] + nrows3 = nrows[2] + dstart1 = starts[0] + dstart2 = starts[1] + dstart3 = starts[2] + dshift1 = dm[0] + dshift2 = dm[1] + dshift3 = dm[2] + cshift1 = cm[0] + cshift2 = cm[1] + cshift3 = cm[2] + ndiags1 = ndiags[0] + ndiags2 = ndiags[1] + ndiags3 = ndiags[2] + dpads1 = gpads[0] + dpads2 = gpads[1] + dpads3 = gpads[2] + pad_imp1 = pad_imp[0] + pad_imp2 = pad_imp[1] + pad_imp3 = pad_imp[2] + + pxm1 = dpads1 * cshift1 + pxm2 = dpads2 * cshift2 + pxm3 = dpads3 * cshift3 + + start_impact1 = dstart1 % dshift1 + start_impact2 = dstart2 % dshift2 + start_impact3 = dstart3 % dshift3 + + v00 = mat00[0, 0, 0, 0, 0, 0] - mat00[0, 0, 0, 0, 0, 0] + x0[0, 0, 0] - x0[0, 0, 0] + + for i1 in range(nrows1): + for i2 in range(nrows2): + for i3 in range(nrows3): + v00 *= 0 + x_min1 = pad_imp1 + (i1 + start_impact1) // cshift1 * dshift1 + x_min2 = pad_imp2 + (i2 + start_impact2) // cshift2 * dshift2 + x_min3 = pad_imp3 + (i3 + start_impact3) // cshift3 * dshift3 + for k1 in range(ndiags1): + for k2 in range(ndiags2): + for k3 in range(ndiags3): + v00 += mat00[pxm1 + i1, pxm2 + i2, pxm3 + i3, k1, k2, k3] * x0[k1 + x_min1, k2 + x_min2, k3 + x_min3] + out0[pxm1 + i1, pxm2 + i2, pxm3 + i3] = v00 + + if 0 < nrows_extra[0]: + pxm1 += nrows1 + start_impact1 += nrows1 + for i1 in range(nrows_extra[0]): + for i2 in range(nrows2): + for i3 in range(nrows3): + v00 *= 0 + x_min1 = pad_imp1 + (i1 + start_impact1) // cshift1 * dshift1 + x_min2 = pad_imp2 + (i2 + start_impact2) // cshift2 * dshift2 + x_min3 = pad_imp3 + (i3 + start_impact3) // cshift3 * dshift3 + for k1 in range(ndiags1 - i1 - 1): + for k2 in range(ndiags2): + for k3 in range(ndiags3): + v00 += mat00[pxm1 + i1, pxm2 + i2, pxm3 + i3, k1, k2, k3] * x0[x_min1 + k1, x_min2 + k2, x_min3 + k3] + out0[pxm1 + i1, pxm2 + i2, pxm3 + i3] = v00 + + if 0 < nrows_extra[1]: + pxm1 = dpads1 * cshift1 + start_impact1 = dstart1 % dshift1 + pxm2 += nrows2 + start_impact2 += nrows2 + for i1 in range(nrows1 + nrows_extra[0]): + for i2 in range(nrows_extra[1]): + for i3 in range(nrows3): + v00 *= 0 + x_min1 = pad_imp1 + (i1 + start_impact1) // cshift1 * dshift1 + x_min2 = pad_imp2 + (i2 + start_impact2) // cshift2 * dshift2 + x_min3 = pad_imp3 + (i3 + start_impact3) // cshift3 * dshift3 + for k1 in range(ndiags1 - max(0, i1 + 1 - nrows1)): + for k2 in range(ndiags2 - i2 - 1): + for k3 in range(ndiags3): + v00 += mat00[pxm1 + i1, pxm2 + i2, pxm3 + i3, k1, k2, k3] * x0[x_min1 + k1, x_min2 + k2, x_min3 + k3] + out0[pxm1 + i1, pxm2 + i2, pxm3 + i3] = v00 + + if 0 < nrows_extra[2]: + pxm1 = dpads1 * cshift1 + pxm2 = dpads2 * cshift2 + start_impact1 = dstart1 % dshift1 + start_impact2 = dstart2 % dshift2 + pxm3 += nrows3 + start_impact3 += nrows3 + for i1 in range(nrows1 + nrows_extra[0]): + for i2 in range(nrows2 + nrows_extra[1]): + for i3 in range(nrows_extra[2]): + v00 *= 0 + x_min1 = pad_imp1 + (i1 + start_impact1) // cshift1 * dshift1 + x_min2 = pad_imp2 + (i2 + start_impact2) // cshift2 * dshift2 + x_min3 = pad_imp3 + (i3 + start_impact3) // cshift3 * dshift3 + for k1 in range(ndiags1 - max(0, i1 + 1 - nrows1)): + for k2 in range(ndiags2 - max(0, i2 + 1 - nrows2)): + for k3 in range(ndiags3 - i3 - 1): + v00 += mat00[pxm1 + i1, pxm2 + i2, pxm3 + i3, k1, k2, k3] * x0[x_min1 + k1, x_min2 + k2, x_min3 + k3] + out0[pxm1 + i1, pxm2 + i2, pxm3 + i3] = v00 diff --git a/psydac/linalg/solvers.py b/psydac/linalg/solvers.py index 58a7d6741..35ea01366 100644 --- a/psydac/linalg/solvers.py +++ b/psydac/linalg/solvers.py @@ -125,7 +125,7 @@ def __init__(self, A, *, x0=None, tol=1e-6, maxiter=1000, verbose=False): self._solver = 'cg' self._options = {"x0":x0, "tol":tol, "maxiter":maxiter, "verbose":verbose} self._check_options(**self._options) - self._tmps = {key: domain.zeros() for key in ("v", "r", "p", "lp", "lv")} + self._tmps = {key: domain.zeros() for key in ("v", "r", "p")} self._info = None def _check_options(self, **kwargs): @@ -205,9 +205,6 @@ def solve(self, b, out=None): v = self._tmps["v"] r = self._tmps["r"] p = self._tmps["p"] - # Not strictly needed by the conjugate gradient, but necessary to avoid temporaries - lp = self._tmps["lp"] - lv = self._tmps["lv"] # First values A.dot(x, out=v) @@ -233,12 +230,10 @@ def solve(self, b, out=None): break A.dot(p, out=v) l = am / v.dot(p) - p.copy(out=lp) - lp *= l - x += lp # this was x += l*p - v.copy(out=lv) - lv *= l - r -= lv # this was r -= l*v + + x.mul_iadd(l, p) # this is x += l*p + r.mul_iadd(-l, v) # this is r -= l*v + am1 = r.dot(r).real p *= (am1/am) p += r @@ -313,8 +308,8 @@ def __init__(self, A, *, pc='jacobi', x0=None, tol=1e-6, maxiter=1000, verbose=F self._solver = 'pcg' self._options = {"x0":x0, "pc":pc, "tol":tol, "maxiter":maxiter, "verbose":verbose} self._check_options(**self._options) - tmps_codomain = {key: codomain.zeros() for key in ("p", "s", "lp")} - tmps_domain = {key: domain.zeros() for key in ("v", "r", "lv")} + tmps_codomain = {key: codomain.zeros() for key in ("p", "s")} + tmps_domain = {key: domain.zeros() for key in ("v", "r")} self._tmps = {**tmps_codomain, **tmps_domain} self._info = None @@ -409,9 +404,6 @@ def solve(self, b, out=None): r = self._tmps["r"] p = self._tmps["p"] s = self._tmps["s"] - # Not strictly needed by the conjugate gradient, but necessary to avoid temporaries - lp = self._tmps["lp"] - lv = self._tmps["lv"] # First values A.dot(x, out=v) @@ -441,19 +433,19 @@ def solve(self, b, out=None): v = A.dot(p, out=v) l = am / v.dot(p) - p.copy(out=lp) - lp *= l - x += lp # this was x += l*p - v.copy(out=lv) - lv *= l - r -= lv # this was r -= l*v + + x.mul_iadd(l, p) # this is x += l*p + r.mul_iadd(-l, v) # this is r -= l*v nrmr_sqr = r.dot(r).real psolve(r, out=s) am1 = s.dot(r) - p *= (am1/am) - p += s + + # we are computing p = (am1 / am) * p + s by using axpy on s and exchanging the arrays + s.mul_iadd((am1/am), p) + s, p = p, s + am = am1 if verbose: @@ -640,8 +632,6 @@ def solve(self, b, out=None): #----------------------- A.dot(p, out=v) Ah.dot(ps, out=vs) - #v = A.dot(p , out=v) # overwriting v, then saving in v. Necessary? - #vs = At.dot(ps, out=vs) # same story #----------------------- # c := (rs, r) @@ -654,32 +644,29 @@ def solve(self, b, out=None): # SOLUTION UPDATE #----------------------- # x := x + a*p - p *= a - x += p + x.mul_iadd(a, p) #----------------------- # r := r - a*v - v *= a - r -= v + r.mul_iadd(-a, v) # rs := rs - conj(a)*vs - vs *= a.conj() - rs -= vs + rs.mul_iadd(-a.conjugate(), vs) + + # ||r||_2 := (r, r) + res_sqr = r.dot(r).real # b := (rs, r)_{m+1} / (rs, r)_m b = rs.dot(r) / c # p := r + b*p - p *= (b/a) # *= (b/a) why a? or update description + p *= b p += r # ps := rs + conj(b)*ps ps *= b.conj() ps += rs - # ||r||_2 := (r, r) - res_sqr = r.dot(r).real - if verbose: print( template.format(m, sqrt(res_sqr)) ) @@ -746,7 +733,7 @@ def __init__(self, A, *, x0=None, tol=1e-6, maxiter=1000, verbose=False): self._solver = 'bicgstab' self._options = {"x0": x0, "tol": tol, "maxiter": maxiter, "verbose": verbose} self._check_options(**self._options) - self._tmps = {key: domain.zeros() for key in ("v", "r", "p", "vs", "r0", "s")} + self._tmps = {key: domain.zeros() for key in ("v", "r", "p", "vr", "r0")} self._info = None def _check_options(self, **kwargs): @@ -834,9 +821,8 @@ def solve(self, b, out=None): v = self._tmps["v"] r = self._tmps["r"] p = self._tmps["p"] - vs = self._tmps["vs"] + vr = self._tmps["vr"] r0 = self._tmps["r0"] - s = self._tmps["s"] # First values A.dot(x, out=v) @@ -845,11 +831,9 @@ def solve(self, b, out=None): #r = b - A.dot(x) r.copy(out=p) v *= 0.0 - vs *= 0.0 + vr *= 0.0 r.copy(out=r0) - r.copy(out=s) - s *= 0.0 res_sqr = r.dot(r).real tol_sqr = tol ** 2 @@ -880,34 +864,25 @@ def solve(self, b, out=None): # a := (r0, r) / (r0, v) a = c / (r0.dot(v)) - # s := r - a*v - s *= 0 - v *= a - s += r - s -= v + # r := r - a*v + r.mul_iadd(-a, v) - # vs := A*s - vs = A.dot(s, out=vs) + # vr := A*r + vr = A.dot(r, out=vr) - # w := (s, A*s) / (A*s, A*s) - w = s.dot(vs) / vs.dot(vs) + # w := (r, A*r) / (A*r, A*r) + w = r.dot(vr) / vr.dot(vr) # ----------------------- # SOLUTION UPDATE # ----------------------- - # x := x + a*p +w*s - p *= a - s *= w - x += p - x += s + # x := x + a*p +w*r + x.mul_iadd(a, p) + x.mul_iadd(w, r) # ----------------------- - # r := s - w*vs - vs *= w - s *= 1 / w - r *= 0 - r += s - r -= vs + # r := r - w*A*r + r.mul_iadd(-w, vr) # ||r||_2 := (r, r) res_sqr = r.dot(r).real @@ -919,10 +894,9 @@ def solve(self, b, out=None): b = r0.dot(r) * a / (c * w) # p := r + b*p- b*w*v - v *= (b * w / a) - p *= (b / a) - p -= v + p *= b p += r + p.mul_iadd(-b * w, v) if verbose: print(template.format(m, sqrt(res_sqr))) @@ -1001,8 +975,7 @@ def __init__(self, A, *, x0=None, tol=1e-6, maxiter=1000, verbose=False): self._solver = 'minres' self._options = {"x0":x0, "tol":tol, "maxiter":maxiter, "verbose":verbose} self._check_options(**self._options) - self._tmps = {key: domain.zeros() for key in ("res1", "res2", "w", "w2", "yc", - "v", "resc", "res2c", "ycc", "res1c", "wc", "w2c")} + self._tmps = {key: domain.zeros() for key in ("res_old", "res_new", "w_new", "w_work", "w_old", "v", "y")} self._info = None def _check_options(self, **kwargs): @@ -1092,40 +1065,30 @@ def solve(self, b, out=None): # Extract local storage v = self._tmps["v"] - w = self._tmps["w"] - w2 = self._tmps["w2"] - res1 = self._tmps["res1"] - res2 = self._tmps["res2"] - # auxiliary to minimzize temps, optimal solution until proven wrong - wc = self._tmps["wc"] - w2c = self._tmps["w2c"] - yc = self._tmps["yc"] - ycc = self._tmps["ycc"] - resc = self._tmps["resc"] - res1c = self._tmps["res1c"] - res2c = self._tmps["res2c"] + y = self._tmps["y"] + w_new = self._tmps["w_new"] + w_work = self._tmps["w_work"] + w_old = self._tmps["w_old"] + res_old = self._tmps["res_old"] + res_new = self._tmps["res_new"] istop = 0 itn = 0 - Anorm = 0 - Acond = 0 rnorm = 0 - ynorm = 0 eps = np.finfo(b.dtype).eps - A.dot(x, out=res1) - res1 -= b - res1 *= -1.0 - y = res1 + A.dot(x, out=y) + y -= b + y *= -1.0 + y.copy(out=res_old) - beta = sqrt(res1.dot(res1)) + beta = sqrt(res_old.dot(res_old)) # Initialize other quantities oldb = 0 dbar = 0 epsln = 0 - qrnorm = beta phibar = beta rhs1 = beta rhs2 = 0 @@ -1134,11 +1097,10 @@ def solve(self, b, out=None): gmin = np.finfo(b.dtype).max cs = -1 sn = 0 - b.copy(out=w) - w *= 0.0 - b.copy(out=w2) - w2 *= 0.0 - res1.copy(out=res2) + w_new *= 0.0 + w_work *= 0.0 + w_old *= 0.0 + res_old.copy(out=res_new) if verbose: print( "MINRES solver:" ) @@ -1153,28 +1115,20 @@ def solve(self, b, out=None): s = 1.0/beta y.copy(out=v) v *= s - A.dot(v, out=yc) - y = yc + A.dot(v, out=y) if itn >= 2: - res1 *= (beta/oldb) - y -= res1 + y.mul_iadd(-(beta/oldb), res_old) alfa = v.dot(y) - res1 = res2 - - res2.copy(out=resc) - resc *= (alfa/beta) - y.copy(out=ycc) - ycc -= resc - y = ycc - res1.copy(out=res1c) - res1 = res1c - y.copy(out=res2c) - res2 = res2c + y.mul_iadd(-(alfa/beta), res_new) + + # We put res_new in res_old and y in res_new + res_new, res_old = res_old, res_new + y.copy(out=res_new) oldb = beta - beta = sqrt(y.dot(y)) + beta = sqrt(res_new.dot(res_new)) tnorm2 += alfa**2 + oldb**2 + beta**2 # Apply previous rotation Qk-1 to get @@ -1187,7 +1141,6 @@ def solve(self, b, out=None): epsln = sn * beta # epsln2 = 0 epslnk+1 dbar = - cs * beta # dbar 2 = beta2 dbar k+1 root = sqrt(gbar**2 + dbar**2) - Arnorm = phibar * root # Compute the next plane rotation Qk @@ -1199,24 +1152,17 @@ def solve(self, b, out=None): phibar = sn * phibar # phibark+1 # Update x. - denom = 1.0/gamma - w1 = w2 - w2 = w - - w1.copy(out=yc) - yc *= oldeps - w2.copy(out=w2c) - w2.copy(out=wc) - w = wc - w2 = w2c - w *= delta - w += yc - w -= v - w *= -denom - w.copy(out=yc) - yc *= phi - x += yc + + # We put w_old in w_work and w_new in w_old + w_work, w_old = w_old, w_work + w_new.copy(out=w_old) + + w_new *= delta + w_new.mul_iadd(oldeps, w_work) + w_new -= v + w_new *= -denom + x.mul_iadd(phi, w_new) # Go round again. @@ -1230,12 +1176,6 @@ def solve(self, b, out=None): Anorm = sqrt(tnorm2) ynorm = sqrt(x.dot(x)) - epsa = Anorm * eps - epsx = Anorm * ynorm * eps - epsr = Anorm * ynorm * tol - diag = gbar - - if diag == 0:diag = epsa rnorm = phibar if ynorm == 0 or Anorm == 0:test1 = inf @@ -1360,8 +1300,8 @@ def __init__(self, A, *, x0=None, tol=None, atol=None, btol=None, maxiter=1000, self._check_options(**self._options) self._info = None self._successful = None - tmps_domain = {key: domain.zeros() for key in ("uh", "uc")} - tmps_codomain = {key: codomain.zeros() for key in ("v", "vh", "h", "hbar")} + tmps_domain = {key: domain.zeros() for key in ("u", "u_work")} + tmps_codomain = {key: codomain.zeros() for key in ("v", "v_work", "h", "hbar")} self._tmps = {**tmps_codomain, **tmps_domain} def get_success(self): @@ -1463,13 +1403,13 @@ def solve(self, b, out=None): x = x0.copy(out=out) # Extract local storage + u = self._tmps["u"] v = self._tmps["v"] h = self._tmps["h"] hbar = self._tmps["hbar"] # Not strictly needed by the LSMR, but necessary to avoid temporaries - uh = self._tmps["uh"] - vh = self._tmps["vh"] - uc = self._tmps["uc"] + u_work = self._tmps["u_work"] + v_work = self._tmps["v_work"] if atol is None:atol = 1e-6 if btol is None:btol = 1e-6 @@ -1477,17 +1417,15 @@ def solve(self, b, out=None): atol = tol btol = tol - u = b + b.copy(out=u) normb = sqrt(b.dot(b).real) - A.dot(x, out=uh) - u -= uh + A.dot(x, out=u_work) + u -= u_work beta = sqrt(u.dot(u).real) if beta > 0: - u.copy(out = uc) - uc *= (1 / beta) - u = uc + u *= (1 / beta) At.dot(u, out=v) alpha = sqrt(v.dot(v).real) else: @@ -1525,9 +1463,6 @@ def solve(self, b, out=None): normA2 = alpha * alpha maxrbar = 0 minrbar = 1e+100 - normA = sqrt(normA2) - condA = 1 - normx = 0 # Items for use in stopping rules, normb set earlier istop = 0 @@ -1536,7 +1471,6 @@ def solve(self, b, out=None): normr = beta # Reverse the order here from the original matlab code because - normar = alpha * beta if verbose: print( "LSMR solver:" ) @@ -1554,15 +1488,15 @@ def solve(self, b, out=None): # alpha*v = A'*u - beta*v. u *= -alpha - A.dot(v, out=uh) - u += uh + A.dot(v, out=u_work) + u += u_work beta = sqrt(u.dot(u).real) if beta > 0: u *= (1 / beta) v *= -beta - At.dot(u, out=vh) - v += vh + At.dot(u, out=v_work) + v += v_work alpha = sqrt(v.dot(v).real) if alpha > 0:v *= (1 / alpha) @@ -1594,9 +1528,7 @@ def solve(self, b, out=None): hbar *= - (thetabar * rho / (rhoold * rhobarold)) hbar += h - hbar.copy(out=uh) - uh *= (zeta / (rho * rhobar)) - x += uh + x.mul_iadd((zeta / (rho * rhobar)), hbar) h *= - (thetanew / rho) h += v @@ -1743,7 +1675,7 @@ def __init__(self, A, *, x0=None, tol=1e-6, maxiter=100, verbose=False): self._solver = 'gmres' self._options = {"x0":x0, "tol":tol, "maxiter":maxiter, "verbose":verbose} self._check_options(**self._options) - self._tmps = {key: domain.zeros() for key in ("r", "p", "v", "lv")} + self._tmps = {key: domain.zeros() for key in ("r", "p")} # Initialize upper Hessenberg matrix self._H = np.zeros((self._options["maxiter"] + 1, self._options["maxiter"]), dtype=A.dtype) @@ -1824,7 +1756,7 @@ def solve(self, b, out=None): # Extract local storage r = self._tmps["r"] - v = self._tmps["v"] + p = self._tmps["p"] # Internal objects of GMRES self._H[:,:] = 0. @@ -1859,7 +1791,7 @@ def solve(self, b, out=None): break # run Arnoldi - self.arnoldi(k) + self.arnoldi(k, p) # make the last diagonal entry in H equal to 0, so that H becomes upper triangular self.apply_givens_rotation(k, sn, cn) @@ -1878,9 +1810,7 @@ def solve(self, b, out=None): y = self.solve_triangular(self._H[:k, :k], beta[:k]) # system of upper triangular matrix for i in range(k): - self._Q[i].copy(out=v) - v *= y[i] - x += v + x.mul_iadd(y[i], self._Q[i]) # Convergence information self._info = {'niter': k+1, 'success': am < tol, 'res_norm': am } @@ -1900,19 +1830,13 @@ def solve_triangular(self, T, d): return y - def arnoldi(self, k): + def arnoldi(self, k, p): h = self._H[:k+2, k] - - p = self._tmps["p"] self._A.dot( self._Q[k] , out=p) # Krylov vector - lv = self._tmps["lv"] - for i in range(k + 1): # Modified Gram-Schmidt, keeping Hessenberg matrix h[i] = p.dot(self._Q[i]) - self._Q[i].copy(out=lv) - lv *= h[i] - p -= lv + p.mul_iadd(-h[i], self._Q[i]) h[k+1] = sqrt(p.dot(p).real) p /= h[k+1] # Normalize vector diff --git a/psydac/linalg/stencil.py b/psydac/linalg/stencil.py index c2c55d3d6..a083e6db0 100644 --- a/psydac/linalg/stencil.py +++ b/psydac/linalg/stencil.py @@ -11,56 +11,83 @@ from scipy.sparse import coo_matrix from mpi4py import MPI -from psydac.linalg.basic import VectorSpace, Vector, LinearOperator -from psydac.ddm.cart import find_mpi_type, CartDecomposition, InterfaceCartDecomposition -from psydac.ddm.utilities import get_data_exchanger -from psydac.linalg.kernels.transpose_kernels import transpose_1d, transpose_2d, transpose_3d, interface_transpose_1d, interface_transpose_2d, interface_transpose_3d -from psydac.linalg.kernels.stencil2coo_kernels import stencil2coo_1d_F, stencil2coo_1d_C,stencil2coo_2d_C, stencil2coo_2d_F, stencil2coo_3d_C,stencil2coo_3d_F -__all__ = ('StencilVectorSpace','StencilVector','StencilMatrix', 'StencilInterfaceMatrix') +from psydac.linalg.basic import VectorSpace, Vector, LinearOperator +from psydac.ddm.cart import find_mpi_type, CartDecomposition, InterfaceCartDecomposition +from psydac.ddm.utilities import get_data_exchanger +from psydac.api.settings import PSYDAC_BACKENDS + +from .kernels.axpy_kernels import axpy_1d, axpy_2d, axpy_3d +from .kernels.inner_kernels import inner_1d, inner_2d, inner_3d +from .kernels.matvec_kernels import matvec_1d, matvec_2d, matvec_3d +from .kernels.transpose_kernels import transpose_1d, transpose_2d, transpose_3d +from .kernels.transpose_kernels import interface_transpose_1d, interface_transpose_2d, interface_transpose_3d +from .kernels.stencil2coo_kernels import stencil2coo_1d_F, stencil2coo_2d_F, stencil2coo_3d_F +from .kernels.stencil2coo_kernels import stencil2coo_1d_C, stencil2coo_2d_C, stencil2coo_3d_C + + +__all__ = ( + 'StencilVectorSpace', + 'StencilVector', + 'StencilMatrix', + 'StencilInterfaceMatrix' +) + +#=============================================================================== +# Dictionary used to select correct kernel functions based on dimensionality +kernels = { + 'axpy' : (None, axpy_1d, axpy_2d, axpy_3d), + 'inner' : (None, inner_1d, inner_2d, inner_3d), + 'matvec': (None, matvec_1d, matvec_2d, matvec_3d), + 'transpose': (None, transpose_1d, transpose_2d, transpose_3d), + 'interface_transpose': (None, interface_transpose_1d, interface_transpose_2d, interface_transpose_3d), + 'stencil2coo': {'F': (None, stencil2coo_1d_F, stencil2coo_2d_F, stencil2coo_3d_F), + 'C': (None, stencil2coo_1d_C, stencil2coo_2d_C, stencil2coo_3d_C)} +} #=============================================================================== def compute_diag_len(pads, shifts_domain, shifts_codomain, return_padding=False): - """ Compute the diagonal length and the padding of the stencil matrix for each direction, - using the shifts of the domain and the codomain. + """ + Compute the diagonal length and the padding of the stencil matrix for each direction, + using the shifts of the domain and the codomain. - Parameters - ---------- - pads : tuple-like (int) - Padding along each direction + Parameters + ---------- + pads : tuple-like (int) + Padding along each direction. - shifts_domain : tuple_like (int) - Shifts of the domain along each direction + shifts_domain : tuple_like (int) + Shifts of the domain along each direction. - shifts_codomain : tuple_like (int) - Shifts of the codomain along each direction + shifts_codomain : tuple_like (int) + Shifts of the codomain along each direction. - return_padding : bool - Return the new padding if True - - Returns - ------- - n : (int) - Diagonal length of the stencil matrix + return_padding : bool + Return the new padding if True. + + Returns + ------- + n : (int) + Diagonal length of the stencil matrix. - ep : (int) - Padding that constitutes the starting index of the non zero elements + ep : (int) + Padding that constitutes the starting index of the non zero elements. """ - n = ((np.ceil((pads+1)/shifts_codomain)-1)*shifts_domain).astype('int') + n = ((np.ceil((pads+1)/shifts_codomain)-1)*shifts_domain).astype('int') ep = -np.minimum(0, n-pads) - n = n+ep + pads+1 + n = n + ep + pads + 1 if return_padding: - return n.astype('int'), (ep).astype('int') + return n.astype('int'), ep.astype('int') else: return n.astype('int') #=============================================================================== -class StencilVectorSpace( VectorSpace ): +class StencilVectorSpace(VectorSpace): """ Vector space for n-dimensional stencil format. Two different initializations are possible: - - serial : StencilVectorSpace( npts, pads, periods, shifts=None, starts=None, ends=None, dtype=float ) - - parallel: StencilVectorSpace( cart, dtype=float ) + - serial : StencilVectorSpace(npts, pads, periods, shifts=None, starts=None, ends=None, dtype=float) + - parallel: StencilVectorSpace(cart, dtype=float) Parameters ---------- @@ -91,21 +118,21 @@ class StencilVectorSpace( VectorSpace ): """ - def __init__( self, cart, dtype=float ): + def __init__(self, cart, dtype=float): - assert isinstance( cart, (CartDecomposition, InterfaceCartDecomposition) ) + assert isinstance(cart, (CartDecomposition, InterfaceCartDecomposition)) # Sequential attributes - self._parallel = cart.is_parallel - self._cart = cart - self._ndim = cart._ndims - self._npts = cart.npts - self._pads = cart.pads - self._periods = cart.periods - self._shifts = cart.shifts - self._dtype = dtype - self._starts = cart.starts - self._ends = cart.ends + self._parallel = cart.is_parallel + self._cart = cart + self._ndim = cart._ndims + self._npts = cart.npts + self._pads = cart.pads + self._periods = cart.periods + self._shifts = cart.shifts + self._dtype = dtype + self._starts = cart.starts + self._ends = cart.ends # The shape of the allocated numpy array self._shape = cart.shape @@ -115,36 +142,67 @@ def __init__( self, cart, dtype=float ): # The dictionary follows the structure {(axis, ext): StencilVectorSpace()} # where axis and ext represent the boundary shared by two patches - self._interfaces = {} + self._interfaces = {} self._interfaces_readonly = MappingProxyType(self._interfaces) - # Parallel attributes if cart.is_parallel and not cart.is_comm_null: - self._mpi_type = find_mpi_type(dtype) + self._mpi_type = find_mpi_type(dtype) if isinstance(cart, InterfaceCartDecomposition): # TODO : Check if this line really change the ._shape self._shape = cart.get_interface_communication_infos(cart.axis)['gbuf_recv_shape'][0] else: - self._synchronizer = get_data_exchanger( cart, dtype , assembly=True, blocking=False) + self._synchronizer = get_data_exchanger(cart, dtype , assembly=True, blocking=False) + + # Select kernel for AXPY operation + if self._ndim in [1, 2, 3]: + self._axpy_func = kernels['axpy'][self._ndim] + else: + self._axpy_func = self._axpy_python + self._axpy_work = self.zeros() # work array + + # Select kernel for inner product + if self._ndim in [1, 2, 3]: + self._inner_func = kernels['inner'][self._ndim] + else: + self._inner_func = self._inner_python + + # Constant arguments for inner product: total number of ghost cells + self._inner_consts = tuple(np.int64(p * s) for p, s in zip(self._pads, self._shifts)) + + # TODO [YG, 06.09.2023]: print warning if pure Python functions are used + + #-------------------------------------- + # Pure Python methods for backup + #-------------------------------------- + def _axpy_python(self, a, x, y): + w = self._axpy_work + x.copy(out=w) # w <- x + w *= a # w <- a * x + y += w # y <- a * x + y + + @staticmethod + def _inner_python(v1, v2, nghost): + index = tuple(slice(ng, -ng) for ng in nghost) + return np.vdot(v1[index].flat, v2[index].flat) #-------------------------------------- # Abstract interface #-------------------------------------- @property - def dimension( self ): + def dimension(self): """ The dimension of a vector space V is the cardinality (i.e. the number of vectors) of a basis of V over its base field. """ - return np.prod( self._npts ) + return np.prod(self._npts) # ... @property - def dtype( self ): + def dtype(self): return self._dtype # ... - def zeros( self ): + def zeros(self): """ Get a copy of the null element of the StencilVectorSpace V. @@ -154,99 +212,138 @@ def zeros( self ): A new vector object with all components equal to zero. """ - return StencilVector( self ) + return StencilVector(self) + + # ... + def axpy(self, a, x, y): + """ + Increment the vector y with the a-scaled vector x, i.e. y = a * x + y, + provided that x and y belong to the same vector space V (self). + The scalar value a may be real or complex, depending on the field of V. + + Parameters + ---------- + a : scalar + The scaling coefficient needed for the operation. + + x : StencilVector + The vector which is not modified by this function. + + y : StencilVector + The vector modified by this function (incremented by a * x). + """ + assert isinstance(x, StencilVector) + assert isinstance(y, StencilVector) + assert x._space is self + assert y._space is self + + if self.dtype == complex: + a = complex(a) + else: + if isinstance(a, complex): + raise TypeError('A complex scalar was given in a real case') + else: + a = float(a) + + self._axpy_func(a, x._data, y._data) + + for axis, ext in self.interfaces: + self._axpy_func(a, x._interface_data[axis, ext], y._interface_data[axis, ext]) + + x._sync = x._sync and y._sync #-------------------------------------- # Other properties/methods #-------------------------------------- @property - def mpi_type( self ): + def mpi_type(self): return self._mpi_type @property - def shape( self ): + def shape(self): return self._shape @property - def parallel( self ): + def parallel(self): return self._parallel # ... @property - def cart( self ): + def cart(self): return self._cart # ... @property - def npts( self ): + def npts(self): return self._npts # ... @property - def starts( self ): + def starts(self): return self._starts # ... @property - def ends( self ): + def ends(self): return self._ends # ... @property - def parent_starts( self ): + def parent_starts(self): return self._parent_starts # ... @property - def parent_ends( self ): + def parent_ends(self): return self._parent_ends # ... @property - def pads( self ): + def pads(self): return self._pads # ... @property - def periods( self ): + def periods(self): return self._periods # ... @property - def shifts( self ): + def shifts(self): return self._shifts # ... @property - def ndim( self ): + def ndim(self): return self._ndim @property - def interfaces( self ): + def interfaces(self): return self._interfaces_readonly def set_interface(self, axis, ext, cart): - - """ Set the interface space along a given axis and extremity. + """ + Set the interface space along a given axis and extremity. Parameters ---------- - axis : int - The axis of the new Interface space. + axis : int + The axis of the new Interface space. - ext: int - The extremity of the new Interface space. - the values must be 1 or -1. + ext: {-1, 1} + The extremity of the new Interface space. - cart: CartDecomposition - The cart of the new space. + cart: CartDecomposition + The cart of the new space. """ - assert int(ext) in [-1,1] + assert int(ext) in [-1, 1] + assert isinstance(cart, (CartDecomposition, InterfaceCartDecomposition)) + + if cart.is_comm_null: + return # Create the interface space in the parallel case using the new cart - assert isinstance(cart, (CartDecomposition, InterfaceCartDecomposition)) - if cart.is_comm_null: return if isinstance(cart, InterfaceCartDecomposition): # Case where the patches that share the interface are owned by different intra-communicators space = StencilVectorSpace(cart, dtype=self.dtype) @@ -279,7 +376,7 @@ def set_interface(self, axis, ext, cart): self._interfaces[axis, ext] = space #=============================================================================== -class StencilVector( Vector ): +class StencilVector(Vector): """ Vector in n-dimensional stencil format. @@ -296,7 +393,7 @@ def __init__(self, V): self._space = V self._sizes = V.shape self._ndim = len(V.npts) - self._data = np.zeros( V.shape, dtype=V.dtype ) + self._data = np.zeros(V.shape, dtype=V.dtype) self._dot_send_data = np.zeros((1,), dtype=V.dtype) self._dot_recv_data = np.zeros((1,), dtype=V.dtype) self._interface_data = {} @@ -304,15 +401,16 @@ def __init__(self, V): # allocate data for the boundary that shares an interface for axis, ext in V.interfaces: - self._interface_data[axis, ext] = np.zeros( V.interfaces[axis, ext].shape, dtype=V.dtype ) + self._interface_data[axis, ext] = np.zeros(V.interfaces[axis, ext].shape, dtype=V.dtype) #prepare communications if V.cart.is_parallel and not V.cart.is_comm_null and isinstance(V.cart, CartDecomposition): self._requests = V._synchronizer.prepare_communications(self._data) # TODO: distinguish between different directions - self._sync = False + self._sync = False + #... def __del__(self): # Release memory of persistent MPI communication channels if self._requests: @@ -342,33 +440,32 @@ def dot(self, v): Parameters ---------- v : StencilVector - Vector of the same space than self needed for the scalar product + Vector of the same space than self needed for the scalar product. Returns ------- null: self._space.dtype - Scalar containing scalar product of v and self + Scalar containing scalar product of v and self. """ assert isinstance(v, StencilVector) assert v._space is self._space + inner_func = self._space._inner_func + inner_args = (self._data, v._data, *self._space._inner_consts) + if self._space.parallel: - self._dot_send_data[0] = self._dot(self._data, v._data , self._space.pads, self._space.shifts) + # Sometimes in the parallel case, we can get an empty vector that breaks our kernel + self._dot_send_data[0] = 0 if self._data.shape[0] == 0 else inner_func(*inner_args) self._space.cart.global_comm.Allreduce((self._dot_send_data, self._space.mpi_type), (self._dot_recv_data, self._space.mpi_type), op=MPI.SUM ) return self._dot_recv_data[0] else: - return self._dot(self._data, v._data , self._space.pads, self._space.shifts) + return inner_func(*inner_args) #... - @staticmethod - def _dot(v1, v2, pads, shifts): - index = tuple( slice(m*p,-m*p) for p,m in zip(pads, shifts)) - return np.vdot(v1[index].flat, v2[index].flat) - def conjugate(self, out=None): if out is not None: assert isinstance(out, StencilVector) @@ -590,6 +687,7 @@ def _toarray_parallel_with_pads(self, order='C'): # Step 4: return flattened array return out.flatten( order=order) + #... def topetsc(self): """ Convert to petsc data structure. """ @@ -637,8 +735,8 @@ def update_ghost_regions(self): if self.space.parallel: if not self.space.cart.is_comm_null: # PARALLEL CASE: fill in ghost regions with data from neighbors - self.space._synchronizer.start_update_ghost_regions( self._data, self._requests ) - self.space._synchronizer.end_update_ghost_regions( self._data, self._requests ) + self.space._synchronizer.start_update_ghost_regions(self._data, self._requests) + self.space._synchronizer. end_update_ghost_regions(self._data, self._requests) else: # SERIAL CASE: fill in ghost regions along periodic directions, otherwise set to zero self._update_ghost_regions_serial() @@ -647,14 +745,15 @@ def update_ghost_regions(self): if self.space.parallel: for axis, ext in self.space.interfaces: - V = self.space.interfaces[axis, ext] - if isinstance(V.cart, InterfaceCartDecomposition):continue + V = self.space.interfaces[axis, ext] + if isinstance(V.cart, InterfaceCartDecomposition): + continue slices = [slice(s, e+2*m*p+1) for s,e,m,p in zip(V.starts, V.ends, V.shifts, V.pads)] self._interface_data[axis, ext][...] = self._data[tuple(slices)] else: for axis, ext in self.space.interfaces: - V = self.space.interfaces[axis, ext] + V = self.space.interfaces[axis, ext] slices = [slice(s, e+2*m*p+1) for s,e,m,p in zip(V.starts, V.ends, V.shifts, V.pads)] self._interface_data[axis, ext][...] = self._data[tuple(slices)] @@ -667,29 +766,29 @@ def _update_ghost_regions_serial(self): ndim = self._space.ndim for direction in range(ndim): periodic = self._space.periods[direction] - p = self._space.pads [direction]*self._space.shifts[direction] + p = self._space.pads [direction] * self._space.shifts[direction] - idx_front = [slice(None)]*direction - idx_back = [slice(None)]*(ndim-direction-1) + idx_front = [slice(None)] * direction + idx_back = [slice(None)] * (ndim-direction-1) if periodic: # Copy data from left to right - idx_from = tuple( idx_front + [slice( p, 2*p)] + idx_back ) - idx_to = tuple( idx_front + [slice(-p,None)] + idx_back ) + idx_from = tuple(idx_front + [slice( p, 2*p)] + idx_back) + idx_to = tuple(idx_front + [slice(-p,None)] + idx_back) self._data[idx_to] = self._data[idx_from] # Copy data from right to left - idx_from = tuple( idx_front + [slice(-2*p,-p)] + idx_back ) - idx_to = tuple( idx_front + [slice(None, p)] + idx_back ) + idx_from = tuple(idx_front + [slice(-2*p,-p)] + idx_back) + idx_to = tuple(idx_front + [slice(None, p)] + idx_back) self._data[idx_to] = self._data[idx_from] else: # Set left ghost region to zero - idx_ghost = tuple( idx_front + [slice(None, p)] + idx_back ) + idx_ghost = tuple(idx_front + [slice(None, p)] + idx_back) self._data[idx_ghost] = 0 # Set right ghost region to zero - idx_ghost = tuple( idx_front + [slice(-p,None)] + idx_back ) + idx_ghost = tuple(idx_front + [slice(-p,None)] + idx_back) self._data[idx_ghost] = 0 # ... @@ -700,22 +799,22 @@ def exchange_assembly_data(self): if self.space.parallel and not self.space.cart.is_comm_null: # PARALLEL CASE: fill in ghost regions with data from neighbors - self.space._synchronizer.start_exchange_assembly_data( self._data ) - self.space._synchronizer.end_exchange_assembly_data( self._data ) + self.space._synchronizer.start_exchange_assembly_data(self._data) + self.space._synchronizer. end_exchange_assembly_data(self._data) else: # SERIAL CASE: fill in ghost regions along periodic directions, otherwise set to zero self._exchange_assembly_data_serial() ndim = self._space.ndim for direction in range(ndim): - idx_front = [slice(None)]*direction - idx_back = [slice(None)]*(ndim-direction-1) + idx_front = [slice(None)] * direction + idx_back = [slice(None)] * (ndim-direction-1) - p = self._space.pads [direction] + p = self._space.pads [direction] m = self._space.shifts[direction] - idx_from = tuple( idx_front + [ slice(-m*p,None) if (-m*p+p)!=0 else slice(-m*p,None)] + idx_back ) + idx_from = tuple(idx_front + [slice(-m*p,None) if (-m*p+p)!=0 else slice(-m*p,None)] + idx_back) self._data[idx_from] = 0. - idx_from = tuple( idx_front + [ slice(0,m*p)] + idx_back ) + idx_from = tuple(idx_front + [slice(0,m*p)] + idx_back) self._data[idx_from] = 0. # ... @@ -726,15 +825,15 @@ def _exchange_assembly_data_serial(self): periodic = self._space.periods[direction] p = self._space.pads [direction] - m = self._space.shifts[direction] + m = self._space.shifts [direction] if periodic: - idx_front = [slice(None)]*direction - idx_back = [slice(None)]*(ndim-direction-1) + idx_front = [slice(None)] * direction + idx_back = [slice(None)] * (ndim-direction-1) # Copy data from left to right - idx_to = tuple( idx_front + [slice( m*p, m*p+p)] + idx_back ) - idx_from = tuple( idx_front + [ slice(-m*p,-m*p+p) if (-m*p+p)!=0 else slice(-m*p,None)] + idx_back ) + idx_to = tuple(idx_front + [slice( m*p, m*p+p)] + idx_back) + idx_from = tuple(idx_front + [slice(-m*p,-m*p+p) if (-m*p+p)!=0 else slice(-m*p,None)] + idx_back) self._data[idx_to] += self._data[idx_from] #-------------------------------------- @@ -743,8 +842,7 @@ def _exchange_assembly_data_serial(self): def _getindex(self, key): # TODO: check if we should ignore padding elements - - if not isinstance( key, tuple ): + if not isinstance(key, tuple): key = (key,) index = [] for (i,s,p,m) in zip(key, self.starts, self.pads,self.space.shifts): @@ -758,7 +856,7 @@ def _getindex(self, key): return tuple(index) #=============================================================================== -class StencilMatrix( LinearOperator ): +class StencilMatrix(LinearOperator): """ Matrix in n-dimensional stencil format. @@ -774,12 +872,11 @@ class StencilMatrix( LinearOperator ): W : psydac.linalg.stencil.StencilVectorSpace Codomain of the new linear operator. - """ - def __init__( self, V, W, pads=None , backend=None): + def __init__(self, V, W, pads=None, backend=None): - assert isinstance( V, StencilVectorSpace ) - assert isinstance( W, StencilVectorSpace ) + assert isinstance(V, StencilVectorSpace) + assert isinstance(W, StencilVectorSpace) assert W.pads == V.pads if not W.dtype==V.dtype: raise NotImplementedError("The domain and the codomain should have the same data type.") @@ -791,10 +888,10 @@ def __init__( self, V, W, pads=None , backend=None): self._pads = pads or tuple(V.pads) dims = list(W.shape) diags = [compute_diag_len(p, md, mc) for p,md,mc in zip(self._pads, V.shifts, W.shifts)] - self._data = np.zeros( dims+diags, dtype=W.dtype ) + self._data = np.zeros(dims+diags, dtype=W.dtype) self._domain = V self._codomain = W - self._ndim = len( dims ) + self._ndim = len(dims) self._backend = backend self._is_T = False self._diag_indices = None @@ -815,53 +912,52 @@ def __init__( self, V, W, pads=None , backend=None): self._sync = False # Prepare the arguments for the dot product method - nd = [(ej-sj+2*gp*mj-mj*p-gp)//mj*mi+1 for sj,ej,mj,mi,p,gp in zip(V.starts, V.ends, V.shifts, W.shifts, self._pads, V.pads)] - nc = [ei-si+1 for si,ei,mj,p in zip(W.starts, W.ends, V.shifts, self._pads)] + nd = [(ej-sj+2*gp*mj-mj*p-gp)//mj*mi+1 for sj,ej,mj,mi,p,gp in zip(V.starts, V.ends, V.shifts, W.shifts, self._pads, V.pads)] + nc = [ei-si+1 for si,ei,mj,p in zip(W.starts, W.ends, V.shifts, self._pads)] # Number of rows in matrix (along each dimension) - nrows = [min(ni,nj) for ni,nj in zip(nc, nd)] - nrows_extra = [max(0,ni-nj) for ni,nj in zip(nc, nd)] - - args = {} - args['starts'] = tuple(V.starts) - args['nrows'] = tuple(nrows) - args['nrows_extra'] = tuple(nrows_extra) - args['gpads'] = tuple(V.pads) - args['pads'] = tuple(self._pads) - args['dm'] = tuple(V.shifts) - args['cm'] = tuple(W.shifts) + nrows = [min(ni, nj) for ni,nj in zip(nc, nd)] + nrows_extra = [max(0, ni-nj) for ni,nj in zip(nc, nd)] + + args = {} + args['starts'] = tuple(V.starts) + args['nrows'] = tuple(nrows) + args['nrows_extra'] = tuple(nrows_extra) + args['gpads'] = tuple(V.pads) + args['pads'] = tuple(self._pads) + args['dm'] = tuple(V.shifts) + args['cm'] = tuple(W.shifts) + ndiags, _ = list(zip(*[compute_diag_len(p,mj,mi, return_padding=True) for p,mi,mj in zip(self._pads, W.shifts, V.shifts)])) + args['pad_imp'] = [gp*m+gp+1-n-s%m+p-gp for gp,m,n,s,p in zip(V.pads, V.shifts, ndiags, V.starts, self._pads)] + args['ndiags'] = ndiags self._dotargs_null = args - self._args = args.copy() - self._func = self._dot + self._dot = kernels['matvec'][self._ndim] self._transpose_args = self._prepare_transpose_args() - self._transpose_func = eval(f'transpose_{self._ndim}d') - if backend is None: - backend = PSYDAC_BACKENDS.get(os.environ.get('PSYDAC_BACKEND')) + self._transpose_func = kernels['transpose'][self._ndim] - if backend: - self.set_backend(backend) + self.set_backend(backend) #-------------------------------------- # Abstract interface #-------------------------------------- @property - def domain( self ): + def domain(self): return self._domain # ... @property - def codomain( self ): + def codomain(self): return self._codomain # ... @property - def dtype( self ): + def dtype(self): return self._domain.dtype # ... - def dot( self, v, out=None): + def dot(self, v, out=None): """ Return the matrix/vector product between self and v. This function optimized this product. @@ -869,20 +965,18 @@ def dot( self, v, out=None): Parameters ---------- v : StencilVector - Vector of the domain of self needed for the Matrix/Vector product + Vector of the domain of self needed for the Matrix/Vector product. out : StencilVector - Vector of the codomain of self + Vector of the codomain of self. Returns ------- out : StencilVector - Vector of the codomain of self, contain the result of the product - + Vector of the codomain of self, contain the result of the product. """ - - assert isinstance( v, StencilVector ) + assert isinstance(v, StencilVector) assert v.space is self.domain if out is not None: @@ -901,6 +995,7 @@ def dot( self, v, out=None): out.ghost_regions_in_sync = False return out + # ... def vdot( self, v, out=None): """ Return the matrix/vector product between the conjugate of self and v. @@ -918,14 +1013,13 @@ def vdot( self, v, out=None): ------- out : StencilVector Vector of the codomain of self, contain the result of the product - """ - assert isinstance( v, StencilVector ) + assert isinstance(v, StencilVector) assert v.space is self.domain if out is not None: - assert isinstance( out, StencilVector ) + assert isinstance(out, StencilVector) assert out.space is self.codomain else: out = StencilVector( self.codomain ) @@ -942,49 +1036,6 @@ def vdot( self, v, out=None): out.ghost_regions_in_sync = False return out - # ... - @staticmethod - def _dot(mat, x, out, starts, nrows, nrows_extra, gpads, pads, dm, cm): - - # Index for k=i-j - ndim = len(x.shape) - kk = [slice(None)]*ndim - - # pads are <= gpads - diff = [gp-p for gp,p in zip(gpads, pads)] - - ndiags, _ = list(zip(*[compute_diag_len(p,mj,mi, return_padding=True) for p,mi,mj in zip(pads,cm,dm)])) - - bb = [p*m+p+1-n-s%m for p,m,n,s in zip(gpads, dm, ndiags, starts)] - - for xx in np.ndindex( *nrows ): - - ii = tuple( mi*pi + x for mi,pi,x in zip(cm, gpads, xx) ) - jj = tuple( slice(b-d+(x+s%mj)//mi*mj,b-d+(x+s%mj)//mi*mj+n) for x,mi,mj,b,s,n,d in zip(xx,cm,dm,bb,starts,ndiags,diff) ) - ii_kk = tuple( list(ii) + kk ) - out[ii] = np.dot( mat[ii_kk].flat, x[jj].flat ) - - new_nrows = list(nrows).copy() - - for d,er in enumerate(nrows_extra): - - rows = new_nrows.copy() - del rows[d] - - for n in range(er): - for xx in np.ndindex(*rows): - xx = list(xx) - xx.insert(d, nrows[d]+n) - - ii = tuple(mi*pi + x for mi,pi,x in zip(cm, gpads, xx)) - ee = [max(x-l+1,0) for x,l in zip(xx, nrows)] - jj = tuple( slice(b-d+(x+s%mj)//mi*mj, b-d+(x+s%mj)//mi*mj+n-e) for x,mi,mj,d,e,b,s,n in zip(xx, cm, dm, diff, ee,bb,starts, ndiags) ) - kk = [slice(None,n-e) for n,e in zip(ndiags, ee)] - ii_kk = tuple( list(ii) + kk ) - out[ii] = np.dot( mat[ii_kk].flat, x[jj].flat ) - - new_nrows[d] += er - # ... def transpose(self, conjugate=False): """ Create new StencilMatrix Mt, where domain and codomain are swapped @@ -1008,7 +1059,7 @@ def transpose(self, conjugate=False): return Mt # ... - def toarray( self, **kwargs ): + def toarray(self, **kwargs): """ Convert to Numpy 2D array. """ order = kwargs.pop('order', 'C') @@ -1022,7 +1073,7 @@ def toarray( self, **kwargs ): return coo.toarray() # ... - def tosparse( self, **kwargs ): + def tosparse(self, **kwargs): """ Convert to any Scipy sparse matrix format. """ order = kwargs.pop('order', 'C') @@ -1042,8 +1093,8 @@ def __neg__(self): return self.__mul__(-1) # ... - def __mul__( self, a ): - w = StencilMatrix( self._domain, self._codomain, self._pads, self._backend ) + def __mul__(self, a): + w = StencilMatrix(self._domain, self._codomain, self._pads, self._backend) w._data = self._data * a w._func = self._func w._args = self._args @@ -1116,12 +1167,12 @@ def conj(self, out=None): # ... @property - def pads( self ): + def pads(self): return self._pads # ... @property - def backend( self ): + def backend(self): return self._backend # ... @@ -1135,11 +1186,11 @@ def __setitem__(self, key, value): self._data[index] = value #... - def max( self ): + def max(self): return self._data.max() #... - def copy( self ): + def copy(self): M = StencilMatrix( self.domain, self.codomain, self._pads, self._backend ) M._data[:] = self._data[:] M._func = self._func @@ -1178,7 +1229,7 @@ def __isub__(self, m): return LinearOperator.__sub__(self, m) #... - def __abs__( self ): + def __abs__(self): w = StencilMatrix( self._domain, self._codomain, self._pads, self._backend ) w._data = abs(self._data) w._func = self._func @@ -1187,7 +1238,7 @@ def __abs__( self ): return w #... - def remove_spurious_entries( self ): + def remove_spurious_entries(self): """ If any dimension is NOT periodic, make sure that the corresponding periodic corners are set to zero. @@ -1226,7 +1277,7 @@ def remove_spurious_entries( self ): self[index] = 0 # ... - def update_ghost_regions( self ): + def update_ghost_regions(self): """ Update ghost regions before performing non-local access to matrix elements (e.g. in matrix transposition). @@ -1247,7 +1298,7 @@ def update_ghost_regions( self ): self._sync = True # ... - def exchange_assembly_data( self ): + def exchange_assembly_data(self): """ Exchange assembly data. """ @@ -1275,7 +1326,7 @@ def exchange_assembly_data( self ): self._data[idx_from] = 0. # ... - def _exchange_assembly_data_serial( self ): + def _exchange_assembly_data_serial(self): ndim = self._codomain.ndim for direction in range(ndim): @@ -1316,19 +1367,18 @@ def diagonal(self): return self._data[self._diag_indices].reshape(nrows) # ... - def topetsc( self ): - """ Convert to petsc data structure. + def topetsc(self): + """ Convert to PETSc data structure. """ from psydac.linalg.topetsc import mat_topetsc - mat = mat_topetsc( self ) + mat = mat_topetsc(self) return mat #-------------------------------------- # Private methods #-------------------------------------- - # ... - def _getindex( self, key ): + def _getindex(self, key): nd = self._ndim ii = key[:nd] @@ -1347,7 +1397,7 @@ def _getindex( self, key ): # ... @staticmethod - def _shift_index( index, shift ): + def _shift_index(index, shift): if isinstance( index, slice ): start = None if index.start is None else index.start + shift stop = None if index.stop is None else index.stop + shift @@ -1355,7 +1405,7 @@ def _shift_index( index, shift ): else: return index + shift - def tocoo_local( self, order='C' ): + def tocoo_local(self, order='C'): # Shortcuts sc = self._codomain.starts @@ -1410,7 +1460,7 @@ def tocoo_local( self, order='C' ): return M #... - def _tocoo_no_pads( self , order='C'): + def _tocoo_no_pads(self , order='C'): # Shortcuts nr = self._codomain.npts @@ -1441,8 +1491,7 @@ def _tocoo_no_pads( self , order='C'): cpads = [np.int64(i) for i in cpads] pp = [np.int64(i) for i in pp] - func = 'stencil2coo_{dim}d_{order}'.format(dim=nd, order=order) - stencil2coo = eval(func) + stencil2coo = kernels['stencil2coo'][order][nd] ind = stencil2coo(self._data, data, rows, cols, *nrl, *ncl, *ss, *nr, *nc, *dm, *cm, *cpads, *pp) M = coo_matrix( @@ -1452,7 +1501,7 @@ def _tocoo_no_pads( self , order='C'): return M #... - def _tocoo_parallel_with_pads( self , order='C'): + def _tocoo_parallel_with_pads(self , order='C'): # If necessary, update ghost regions if not self.ghost_regions_in_sync: @@ -1539,21 +1588,21 @@ def _tocoo_parallel_with_pads( self , order='C'): # ... @property - def ghost_regions_in_sync( self ): + def ghost_regions_in_sync(self): return self._sync # ... # NOTE: this property must be set collectively @ghost_regions_in_sync.setter - def ghost_regions_in_sync( self, value ): - assert isinstance( value, bool ) + def ghost_regions_in_sync(self, value): + assert isinstance(value, bool) self._sync = value # ... - def _update_ghost_regions_serial( self ): + def _update_ghost_regions_serial(self): - ndim = self._codomain.ndim - for direction in range( self._codomain.ndim ): + ndim = self._codomain.ndim + for direction in range(self._codomain.ndim): periodic = self._codomain.periods[direction] p = self._codomain.pads [direction] @@ -1564,23 +1613,23 @@ def _update_ghost_regions_serial( self ): if periodic: # Copy data from left to right - idx_from = tuple( idx_front + [slice( p, 2*p)] + idx_back ) - idx_to = tuple( idx_front + [slice(-p,None)] + idx_back ) + idx_from = tuple(idx_front + [slice( p, 2*p)] + idx_back) + idx_to = tuple(idx_front + [slice(-p,None)] + idx_back) self._data[idx_to] = self._data[idx_from] # Copy data from right to left - idx_from = tuple( idx_front + [slice(-2*p,-p)] + idx_back ) - idx_to = tuple( idx_front + [slice(None, p)] + idx_back ) + idx_from = tuple(idx_front + [slice(-2*p,-p)] + idx_back) + idx_to = tuple(idx_front + [slice(None, p)] + idx_back) self._data[idx_to] = self._data[idx_from] else: # Set left ghost region to zero - idx_ghost = tuple( idx_front + [slice(None, p)] + idx_back ) + idx_ghost = tuple(idx_front + [slice(None, p)] + idx_back) self._data[idx_ghost] = 0 # Set right ghost region to zero - idx_ghost = tuple( idx_front + [slice(-p,None)] + idx_back ) + idx_ghost = tuple(idx_front + [slice(-p,None)] + idx_back) self._data[idx_ghost] = 0 # ... @@ -1593,21 +1642,21 @@ def _prepare_transpose_args(self): eec = W.ends ssd = V.starts eed = V.ends - pads = self._pads + pads = self._pads gpads = V.pads dm = V.shifts cm = W.shifts # Number of rows in the transposed matrix (along each dimension) - nrows = [e-s+1 for s, e in zip(ssd, eed)] - ncols = [e-s+2*m*p+1 for s, e, m, p in zip(ssc, eec, cm, gpads)] + nrows = [e-s+1 for s, e in zip(ssd, eed)] + ncols = [e-s+2*m*p+1 for s, e, m, p in zip(ssc, eec, cm, gpads)] pp = pads ndiags, starts = list(zip(*[compute_diag_len(p, mi, mj, return_padding=True) for p, mi, mj in zip(pp, cm, dm)])) ndiagsT, _ = list(zip(*[compute_diag_len(p, mj, mi, return_padding=True) for p, mi, mj in zip(pp, cm, dm)])) - diff = [gp-p for gp, p in zip(gpads, pp)] + diff = [gp-p for gp, p in zip(gpads, pp)] sl = [(s if mi > mj else 0) + (s % mi + mi//mj if mi < mj else 0)+(s if mi == mj else 0)\ for s, p, mi, mj in zip(starts, pp, cm, dm)] @@ -1632,20 +1681,23 @@ def _prepare_transpose_args(self): args['cm'] = np.int64(cm) args['nd'] = np.int64(ndiags) args['ndT'] = np.int64(ndiagsT) - args['si'] = np.int64(si) - args['sk'] = np.int64(sk) - args['sl'] = np.int64(sl) + args['si'] = np.int64(si) + args['sk'] = np.int64(sk) + args['sl'] = np.int64(sl) return args # ... def set_backend(self, backend): from psydac.api.ast.linalg import LinearOperatorDot - self._backend = backend - self._args = self._dotargs_null.copy() + self._backend = backend + self._args = self._dotargs_null.copy() if self._backend is None: - self._func = self._dot + for key, arg in self._args.items(): + self._args[key] = np.int64(arg) + self._func = self._dot + self._args.pop('pads') else: if self.domain.parallel: comm = self.codomain.cart.comm @@ -1730,6 +1782,8 @@ def set_backend(self, backend): self._args.pop('dm') self._args.pop('cm') + self._args.pop('pad_imp') + self._args.pop('ndiags') self._func = dot.func #=============================================================================== @@ -1737,8 +1791,8 @@ def set_backend(self, backend): # - Check if StencilMatrix should be subclassed # - Reimplement magic methods (some are simply copied from StencilMatrix) def flip_axis(index, n): - s = n-index.start-1 - e = n-index.stop-1 if n>index.stop else None + s = n - index.start-1 + e = n - index.stop-1 if n > index.stop else None return slice(s,e,-1) class StencilInterfaceMatrix(LinearOperator): @@ -1783,10 +1837,10 @@ class StencilInterfaceMatrix(LinearOperator): Padding of the linear operator. """ - def __init__( self, V, W, s_d, s_c, d_axis, c_axis, d_ext, c_ext, *, flip=None, pads=None, backend=None ): + def __init__(self, V, W, s_d, s_c, d_axis, c_axis, d_ext, c_ext, *, flip=None, pads=None, backend=None): - assert isinstance( V, StencilVectorSpace ) - assert isinstance( W, StencilVectorSpace ) + assert isinstance(V, StencilVectorSpace) + assert isinstance(W, StencilVectorSpace) assert W.pads == V.pads Vin = V.interfaces[d_axis, d_ext] @@ -1795,17 +1849,17 @@ def __init__( self, V, W, s_d, s_c, d_axis, c_axis, d_ext, c_ext, *, flip=None, for p,vp in zip(pads, Vin.pads): assert p<=vp - self._pads = pads or tuple(Vin.pads) - dims = list(W.shape) + self._pads = pads or tuple(Vin.pads) + dims = list(W.shape) if W.parent_ends[c_axis] is not None: diff = min(1, W.parent_ends[c_axis]-W.ends[c_axis]) else: diff = 0 - dims[c_axis] = W.pads[c_axis] + 1-diff + 2*W.shifts[c_axis]*W.pads[c_axis] - diags = [compute_diag_len(p, md, mc) for p,md,mc in zip(self._pads, Vin.shifts, W.shifts)] - self._data = np.zeros( dims+diags, dtype=W.dtype ) + dims[c_axis] = W.pads[c_axis] + 1-diff + 2*W.shifts[c_axis]*W.pads[c_axis] + diags = [compute_diag_len(p, md, mc) for p,md,mc in zip(self._pads, Vin.shifts, W.shifts)] + self._data = np.zeros(dims + diags, dtype=W.dtype) # Parallel attributes if W.parallel and not isinstance(W.cart, InterfaceCartDecomposition): @@ -1831,13 +1885,13 @@ def __init__( self, V, W, s_d, s_c, d_axis, c_axis, d_ext, c_ext, *, flip=None, self._codomain_ext = c_ext self._domain_start = s_d self._codomain_start = s_c - self._ndim = len( dims ) - self._backend = None + self._ndim = len(dims) + self._backend = None # Prepare the arguments for the dot product method - nd = [(ej-sj+2*gp*mj-mj*p-gp)//mj*mi+1 for sj,ej,mj,mi,p,gp in zip(Vin.starts, Vin.ends, Vin.shifts, W.shifts, self._pads, Vin.pads)] - nc = [ei-si+1 for si,ei,mj,p in zip(W.starts, W.ends, Vin.shifts, self._pads)] + nd = [(ej-sj+2*gp*mj-mj*p-gp)//mj*mi+1 for sj,ej,mj,mi,p,gp in zip(Vin.starts, Vin.ends, Vin.shifts, W.shifts, self._pads, Vin.pads)] + nc = [ei-si+1 for si,ei,mj,p in zip(W.starts, W.ends, Vin.shifts, self._pads)] # Number of rows in matrix (along each dimension) nrows = [min(ni,nj) for ni,nj in zip(nc, nd)] @@ -1846,26 +1900,26 @@ def __init__( self, V, W, s_d, s_c, d_axis, c_axis, d_ext, c_ext, *, flip=None, nrows[c_axis] = W.pads[c_axis] + 1-diff-nrows_extra[c_axis] - args = {} - args['starts'] = tuple(Vin.starts) - args['nrows'] = tuple(nrows) - args['nrows_extra'] = tuple(nrows_extra) - args['gpads'] = tuple(Vin.pads) - args['pads'] = tuple(self._pads) - args['dm'] = tuple(Vin.shifts) - args['cm'] = tuple(W.shifts) - args['c_axis'] = c_axis - args['d_start'] = self._domain_start - args['c_start'] = self._codomain_start - args['flip'] = self._flip - args['permutation'] = self._permutation + args = {} + args['starts'] = tuple(Vin.starts) + args['nrows'] = tuple(nrows) + args['nrows_extra'] = tuple(nrows_extra) + args['gpads'] = tuple(Vin.pads) + args['pads'] = tuple(self._pads) + args['dm'] = tuple(Vin.shifts) + args['cm'] = tuple(W.shifts) + args['c_axis'] = c_axis + args['d_start'] = self._domain_start + args['c_start'] = self._codomain_start + args['flip'] = self._flip + args['permutation'] = self._permutation self._dotargs_null = args self._args = args.copy() self._func = self._dot self._transpose_args = self._prepare_transpose_args() - self._transpose_func = eval(f'interface_transpose_{self._ndim}d') + self._transpose_func = kernels['interface_transpose'][self._ndim] if backend is None: backend = PSYDAC_BACKENDS.get(os.environ.get('PSYDAC_BACKEND')) @@ -1880,29 +1934,29 @@ def __init__( self, V, W, s_d, s_c, d_axis, c_axis, d_ext, c_ext, *, flip=None, # Abstract interface #-------------------------------------- @property - def domain( self ): + def domain(self): return self._domain # ... @property - def codomain( self ): + def codomain(self): return self._codomain # ... @property - def dtype( self ): + def dtype(self): return self.domain.dtype # ... - def dot( self, v, out=None ): + def dot(self, v, out=None): - assert isinstance( v, StencilVector ) + assert isinstance(v, StencilVector) assert v.space is self.domain # Necessary if vector space is distributed across processes if out is not None: - assert isinstance( out, StencilVector ) + assert isinstance(out, StencilVector) assert out.space is self.codomain out[(slice(None,None),)*v.space.ndim] = 0. else: @@ -1922,10 +1976,10 @@ def dot( self, v, out=None ): def _dot(mat, v, out, starts, nrows, nrows_extra, gpads, pads, dm, cm, c_axis, d_start, c_start, flip, permutation): # Index for k=i-j - nrows = list(nrows) - ndim = len(v.shape) - kk = [slice(None)]*ndim - diff = [xp-p for xp,p in zip(gpads, pads)] + nrows = list(nrows) + ndim = len(v.shape) + kk = [slice(None)]*ndim + diff = [xp-p for xp,p in zip(gpads, pads)] ndiags, _ = list(zip(*[compute_diag_len(p,mj,mi, return_padding=True) for p,mi,mj in zip(pads,cm,dm)])) bb = [p*m+p+1-n-s%m for p,m,n,s in zip(gpads, dm, ndiags, starts)] @@ -2003,8 +2057,8 @@ def _prepare_transpose_args(self): dim = self._codomain_axis # Number of rows in the transposed matrix (along each dimension) - nrows = [e-s+1 for s,e in zip(ssd, eed)] - ncols = [e-s+1+2*m*p for s, e, m, p in zip(ssc, eec, cm, gpads)] + nrows = [e-s+1 for s,e in zip(ssd, eed)] + ncols = [e-s+1+2*m*p for s, e, m, p in zip(ssc, eec, cm, gpads)] pp = pads ndiags, starts = list(zip(*[compute_diag_len(p,mi,mj, return_padding=True) for p, mi, mj in zip(pp, cm, dm)])) @@ -2040,8 +2094,7 @@ def _prepare_transpose_args(self): nrows[dim] = pads[dim] + 1 - diff_r ncols[dim] = pads[dim] + 1 - diff_c + 2*cm[dim]*pads[dim] - - args={} + args = {} args['n'] = np.int64(nrows) args['nc'] = np.int64(ncols) args['gp'] = np.int64(gpads) @@ -2050,12 +2103,14 @@ def _prepare_transpose_args(self): args['cm'] = np.int64(cm) args['nd'] = np.int64(ndiags) args['ndT'] = np.int64(ndiagsT) - args['si'] = np.int64(si) - args['sk'] = np.int64(sk) - args['sl'] = np.int64(sl) + args['si'] = np.int64(si) + args['sk'] = np.int64(sk) + args['sl'] = np.int64(sl) + return args + # ... - def toarray( self, **kwargs ): + def toarray(self, **kwargs): order = kwargs.pop('order', 'C') with_pads = kwargs.pop('with_pads', False) @@ -2068,7 +2123,7 @@ def toarray( self, **kwargs ): return coo.toarray() # ... - def tosparse( self, **kwargs ): + def tosparse(self, **kwargs): order = kwargs.pop('order', 'C') with_pads = kwargs.pop('with_pads', False) @@ -2081,7 +2136,7 @@ def tosparse( self, **kwargs ): return coo #... - def copy( self ): + def copy(self): M = StencilInterfaceMatrix( self._domain, self._codomain, self._domain_start, self._codomain_start, self._domain_axis, self._codomain_axis, @@ -2096,7 +2151,7 @@ def __neg__(self): return self.__mul__(-1) #... - def __mul__( self, a ): + def __mul__(self, a): w = self.copy() w._data *= a w._sync = self._sync @@ -2128,52 +2183,52 @@ def __isub__(self, m): # ... @property - def domain_axis( self ): + def domain_axis(self): return self._domain_axis # ... @property - def codomain_axis( self ): + def codomain_axis(self): return self._codomain_axis # ... @property - def domain_ext( self ): + def domain_ext(self): return self._domain_ext # ... @property - def codomain_ext( self ): + def codomain_ext(self): return self._codomain_ext # ... @property - def domain_start( self ): + def domain_start(self): return self._domain_start # ... @property - def codomain_start( self ): + def codomain_start(self): return self._codomain_start # ... @property - def dim( self ): + def dim(self): return self._ndim # ... @property - def flip( self ): + def flip(self): return self._flip # ... @property - def permutation( self ): + def permutation(self): return self._permutation # ... @property - def pads( self ): + def pads(self): return self._pads # ... @@ -2187,18 +2242,18 @@ def __setitem__(self, key, value): self._data[index] = value #... - def max( self ): + def max(self): return self._data.max() # ... @property - def backend( self ): + def backend(self): return self._backend #-------------------------------------- # Private methods #-------------------------------------- - def _getindex( self, key ): + def _getindex(self, key): nd = self._ndim ii = key[:nd] @@ -2206,20 +2261,20 @@ def _getindex( self, key ): index = [] - for i,s,p in zip( ii, self._codomain.starts, self._codomain.pads ): - x = self._shift_index( i, p-s ) - index.append( x ) + for i,s,p in zip(ii, self._codomain.starts, self._codomain.pads): + x = self._shift_index(i, p-s) + index.append(x) - for k,p in zip( kk, self._pads ): - l = self._shift_index( k, p ) - index.append( l ) + for k,p in zip(kk, self._pads): + l = self._shift_index(k, p) + index.append(l) return tuple(index) # ... @staticmethod - def _shift_index( index, shift ): - if isinstance( index, slice ): + def _shift_index(index, shift): + if isinstance(index, slice): start = None if index.start is None else index.start + shift stop = None if index.stop is None else index.stop + shift return slice(start, stop, index.step) @@ -2227,7 +2282,7 @@ def _shift_index( index, shift ): return index + shift #... - def _tocoo_no_pads( self ): + def _tocoo_no_pads(self): # Shortcuts nr = self.codomain.npts nc = self.domain.npts @@ -2273,12 +2328,12 @@ def _tocoo_no_pads( self ): jj = [jj[i] for i in permutation] - I = ravel_multi_index( ii, dims=nr, order='C' ) - J = ravel_multi_index( jj, dims=nc, order='C' ) + I = ravel_multi_index(ii, dims=nr, order='C') + J = ravel_multi_index(jj, dims=nc, order='C') - rows.append( I ) - cols.append( J ) - data.append( value ) + rows.append(I) + cols.append(J) + data.append(value) M = coo_matrix( (data,(rows,cols)), @@ -2289,55 +2344,55 @@ def _tocoo_no_pads( self ): # ... @property - def ghost_regions_in_sync( self ): + def ghost_regions_in_sync(self): return self._sync # ... # NOTE: this property must be set collectively @ghost_regions_in_sync.setter - def ghost_regions_in_sync( self, value ): - assert isinstance( value, bool ) + def ghost_regions_in_sync(self, value): + assert isinstance(value, bool) self._sync = value # ... - def _update_ghost_regions_serial( self, direction: int ): + def _update_ghost_regions_serial(self, direction: int): if direction is None: - for d in range( self._codomain.ndim ): - self._update_ghost_regions_serial( d ) + for d in range(self._codomain.ndim): + self._update_ghost_regions_serial(d) return ndim = self._codomain.ndim periodic = self._codomain.periods[direction] p = self._codomain.pads [direction] - idx_front = [slice(None)]*direction - idx_back = [slice(None)]*(ndim-direction-1 + ndim) + idx_front = [slice(None)] * direction + idx_back = [slice(None)] * (ndim-direction-1) if periodic: # Copy data from left to right - idx_from = tuple( idx_front + [slice( p, 2*p)] + idx_back ) - idx_to = tuple( idx_front + [slice(-p,None)] + idx_back ) + idx_from = tuple(idx_front + [slice( p, 2*p)] + idx_back) + idx_to = tuple(idx_front + [slice(-p,None)] + idx_back) self._data[idx_to] = self._data[idx_from] # Copy data from right to left - idx_from = tuple( idx_front + [slice(-2*p,-p)] + idx_back ) - idx_to = tuple( idx_front + [slice(None, p)] + idx_back ) + idx_from = tuple(idx_front + [slice(-2*p,-p)] + idx_back) + idx_to = tuple(idx_front + [slice(None, p)] + idx_back) self._data[idx_to] = self._data[idx_from] else: # Set left ghost region to zero - idx_ghost = tuple( idx_front + [slice(None, p)] + idx_back ) + idx_ghost = tuple(idx_front + [slice(None, p)] + idx_back) self._data[idx_ghost] = 0 # Set right ghost region to zero - idx_ghost = tuple( idx_front + [slice(-p,None)] + idx_back ) + idx_ghost = tuple(idx_front + [slice(-p,None)] + idx_back) self._data[idx_ghost] = 0 # ... - def exchange_assembly_data( self ): + def exchange_assembly_data(self): """ Exchange assembly data. """ @@ -2346,38 +2401,41 @@ def exchange_assembly_data( self ): if self._codomain.parallel: # PARALLEL CASE: fill in ghost regions with data from neighbors - self._synchronizer.start_exchange_assembly_data( self._data ) - self._synchronizer.end_exchange_assembly_data( self._data ) + self._synchronizer.start_exchange_assembly_data(self._data) + self._synchronizer. end_exchange_assembly_data(self._data) else: # SERIAL CASE: fill in ghost regions along periodic directions, otherwise set to zero self._exchange_assembly_data_serial() # ... - def _exchange_assembly_data_serial( self ): + def _exchange_assembly_data_serial(self): - ndim = self._codomain.ndim + ndim = self._codomain.ndim for direction in range(ndim): - if direction == self._codomain_axis:continue + if direction == self._codomain_axis: + continue periodic = self._codomain.periods[direction] p = self._codomain.pads [direction] - m = self._codomain.shifts[direction] + m = self._codomain.shifts [direction] if periodic: - idx_front = [slice(None)]*direction - idx_back = [slice(None)]*(ndim-direction-1) + idx_front = [slice(None)] * direction + idx_back = [slice(None)] * (ndim-direction-1) # Copy data from left to right - idx_to = tuple( idx_front + [slice( m*p, m*p+p)] + idx_back ) - idx_from = tuple( idx_front + [ slice(-m*p,-m*p+p) if (-m*p+p)!=0 else slice(-m*p,None)] + idx_back ) + idx_to = tuple(idx_front + [slice( m*p, m*p+p)] + idx_back) + idx_from = tuple(idx_front + [slice(-m*p,-m*p+p) if (-m*p+p)!=0 else slice(-m*p, None)] + idx_back) self._data[idx_to] += self._data[idx_from] + # ... def set_backend(self, backend): from psydac.api.ast.linalg import LinearOperatorDot - self._backend = backend - self._args = self._dotargs_null.copy() + + self._backend = backend + self._args = self._dotargs_null.copy() if self._backend is None: - self._func = self._dot + self._func = self._dot else: if self.domain.parallel: @@ -2439,10 +2497,10 @@ def set_backend(self, backend): self._args['s00_{i}'.format(i=i+1)] = np.int64(starts[i]) for i in range(len(nrows)): - self._args['n00_{i}'.format(i=i+1)] = np.int64(nrows[i]) + self._args['n00_{i}'.format(i=i+1)] = np.int64(nrows[i]) for i in range(len(nrows)): - self._args['ne00_{i}'.format(i=i+1)] = np.int64(nrows_extra[i]) + self._args['ne00_{i}'.format(i=i+1)] = np.int64(nrows_extra[i]) else: dot = LinearOperatorDot(self._ndim, @@ -2469,5 +2527,4 @@ def set_backend(self, backend): self._func = dot.func #=============================================================================== -from psydac.api.settings import PSYDAC_BACKENDS del VectorSpace, Vector diff --git a/psydac/linalg/tests/test_block.py b/psydac/linalg/tests/test_block.py index c556ba4a4..c67f49262 100644 --- a/psydac/linalg/tests/test_block.py +++ b/psydac/linalg/tests/test_block.py @@ -299,30 +299,31 @@ def test_2D_block_diagonal_solver_serial_init( dtype, n1, n2, p1, p2, P1, P2 ): assert L3.blocks == (M1, M2) assert L3.n_blocks == 2 #=============================================================================== +@pytest.mark.parametrize( 'dtype', [float, complex] ) @pytest.mark.parametrize( 'ndim', [1, 2, 3] ) @pytest.mark.parametrize( 'p', [1, 2] ) @pytest.mark.parametrize( 'P1', [True, False] ) @pytest.mark.parametrize( 'P2', [True, False] ) @pytest.mark.parametrize( 'P3', [True] ) -def test_block_serial_dimension( ndim, p, P1, P2, P3 ): +def test_block_serial_dimension( ndim, p, P1, P2, P3, dtype ): - if ndim==1: - npts=[12] - ps=[p] - Ps=[P1] - shifts=[1] + if ndim == 1: + npts = [12] + ps = [p] + Ps = [P1] + shifts = [1] - elif ndim==2: - npts=[12,15] - ps=[p,p] - Ps=[P1,P2] - shifts=[1,1] + elif ndim == 2: + npts =[12, 15] + ps = [p, p] + Ps = [P1, P2] + shifts = [1, 1] else: - npts=[12,15,9] - ps=[p,p,p] - Ps=[P1,P2,P3] - shifts=[1,1,1] + npts = [12, 15, 9] + ps = [p, p, p] + Ps = [P1, P2, P3] + shifts = [1, 1, 1] # set seed for reproducibility D = DomainDecomposition(npts, periods=Ps) @@ -333,7 +334,11 @@ def test_block_serial_dimension( ndim, p, P1, P2, P3 ): cart = CartDecomposition(D, npts, global_starts, global_ends, pads=ps, shifts=shifts) # Create vector spaces, stencil matrices, and stencil vectors - V = StencilVectorSpace( cart) + V = StencilVectorSpace( cart, dtype=dtype) + if dtype==complex: + cst=1j + else: + cst=1 x1 = StencilVector( V ) x2 = StencilVector( V ) @@ -345,22 +350,22 @@ def test_block_serial_dimension( ndim, p, P1, P2, P3 ): # Fill in vector with random values, then update ghost regions if ndim==1: - x1[:] = 2.0*np.random.random((npts[0]+2*p)) - x2[:] = 5.0*np.random.random((npts[0]+2*p)) - y1[:] = 2.0*np.random.random((npts[0]+2*p)) - y2[:] = 3.0*np.random.random((npts[0]+2*p)) + x1[:] = cst*2.0*np.random.random((npts[0]+2*p)) + x2[:] = cst*5.0*np.random.random((npts[0]+2*p)) + y1[:] = cst*2.0*np.random.random((npts[0]+2*p)) + y2[:] = cst*3.0*np.random.random((npts[0]+2*p)) elif ndim==2: - x1[:,:] = 2.0*np.random.random((npts[0]+2*p,npts[1]+2*p)) - x2[:,:] = 5.0*np.random.random((npts[0]+2*p,npts[1]+2*p)) - y1[:,:] = 2.0*np.random.random((npts[0]+2*p,npts[1]+2*p)) - y2[:,:] = 3.0*np.random.random((npts[0]+2*p,npts[1]+2*p)) + x1[:,:] = cst*2.0*np.random.random((npts[0]+2*p,npts[1]+2*p)) + x2[:,:] = cst*5.0*np.random.random((npts[0]+2*p,npts[1]+2*p)) + y1[:,:] = cst*2.0*np.random.random((npts[0]+2*p,npts[1]+2*p)) + y2[:,:] = cst*3.0*np.random.random((npts[0]+2*p,npts[1]+2*p)) else: - x1[:,:,:] = 2.0*np.random.random((npts[0]+2*p,npts[1]+2*p,npts[2]+2*p)) - x2[:,:,:] = 5.0*np.random.random((npts[0]+2*p,npts[1]+2*p,npts[2]+2*p)) - y1[:,:,:] = 2.0*np.random.random((npts[0]+2*p,npts[1]+2*p,npts[2]+2*p)) - y2[:,:,:] = 3.0*np.random.random((npts[0]+2*p,npts[1]+2*p,npts[2]+2*p)) + x1[:,:,:] = cst*2.0*np.random.random((npts[0]+2*p,npts[1]+2*p,npts[2]+2*p)) + x2[:,:,:] = cst*5.0*np.random.random((npts[0]+2*p,npts[1]+2*p,npts[2]+2*p)) + y1[:,:,:] = cst*2.0*np.random.random((npts[0]+2*p,npts[1]+2*p,npts[2]+2*p)) + y2[:,:,:] = cst*3.0*np.random.random((npts[0]+2*p,npts[1]+2*p,npts[2]+2*p)) x1.update_ghost_regions() x2.update_ghost_regions() @@ -380,11 +385,18 @@ def test_block_serial_dimension( ndim, p, P1, P2, P3 ): Y[0] = y1 Y[1] = y2 - exact_dot=x1.dot(y1)+x2.dot(y2) + # Test dot product + exact_dot = x1.dot(y1)+x2.dot(y2) - assert X.dtype == float + assert X.dtype == dtype assert np.allclose(X.dot(Y), exact_dot, rtol=1e-14, atol=1e-14 ) + # Test axpy product + axpy_exact = X + np.pi * cst * Y + X.mul_iadd(np.pi * cst, Y) + assert np.allclose(X[0]._data, axpy_exact[0]._data, rtol=1e-10, atol=1e-10 ) + assert np.allclose(X[1]._data, axpy_exact[1]._data, rtol=1e-10, atol=1e-10 ) + M1 = StencilMatrix(V, V) M2 = StencilMatrix(V, V) M3 = StencilMatrix(V, V) @@ -421,7 +433,7 @@ def test_block_serial_dimension( ndim, p, P1, P2, P3 ): Y[0]=M1.dot(x1)+M2.dot(x2) Y[1]=M3.dot(x1) - assert M.dtype == float + assert M.dtype == dtype assert np.allclose((M.dot(X)).toarray(), Y.toarray(), rtol=1e-14, atol=1e-14 ) #=============================================================================== @pytest.mark.parametrize( 'dtype', [float, complex] ) @@ -1415,6 +1427,13 @@ def test_block_matrix_operator_parallel_dot_backend( dtype, n1, n2, p1, p2, P1, y1 = M1.dot(x1) + M2.dot(x2) y2 = M3.dot(x1) + M4.dot(x2) + #Test axpy method in parallel + z3 = X + 5 * factor * Y + X.mul_iadd(5 * factor, Y) + + # Test exact value and symetry of the scalar product + assert np.allclose(X[0]._data, z3[0]._data) + # Check data in 1D array assert np.allclose( Y.blocks[0].toarray(), y1.toarray(), rtol=1e-13, atol=1e-13 ) assert np.allclose( Y.blocks[1].toarray(), y2.toarray(), rtol=1e-13, atol=1e-13 ) diff --git a/psydac/linalg/tests/test_stencil_matrix.py b/psydac/linalg/tests/test_stencil_matrix.py index 0fd702796..521bdd587 100644 --- a/psydac/linalg/tests/test_stencil_matrix.py +++ b/psydac/linalg/tests/test_stencil_matrix.py @@ -730,7 +730,7 @@ def test_stencil_matrix_2d_serial_dot_1(dtype, n1, n2, p1, p2, s1, s2, P1, P2): # Check data in 1D array assert y.dtype==dtype - assert np.allclose(ya, ya_exact, rtol=1e-13, atol=1e-13) + assert np.allclose(ya, ya_exact, rtol=1e-12, atol=1e-12) # TODO: verify for s>1 # =============================================================================== @@ -1208,9 +1208,6 @@ def test_stencil_matrix_2d_serial_vdot(dtype, n1, n2, p1, p2, s1, s2, P1, P2): # Exact result using Numpy dot product ya_exact = np.dot(np.conjugate(Ma), xa) - - print(ya) - print(ya_exact) # Check data in 1D array assert y.dtype == dtype assert np.allclose(ya, ya_exact, rtol=1e-13, atol=1e-13) @@ -2179,7 +2176,6 @@ def test_stencil_matrix_1d_parallel_dot(dtype, n1, p1, sh1, P1): ya_exact = Ms.dot(xa) # Check data in 1D array - print(ya-ya_exact) assert np.allclose(ya, ya_exact, rtol=1e-14, atol=1e-14) # =============================================================================== diff --git a/psydac/linalg/tests/test_stencil_vector.py b/psydac/linalg/tests/test_stencil_vector.py index 9bc51e8bf..03da52906 100644 --- a/psydac/linalg/tests/test_stencil_vector.py +++ b/psydac/linalg/tests/test_stencil_vector.py @@ -298,11 +298,21 @@ def test_stencil_vector_2d_serial_dot(dtype, n1, n2, p1, p2, s1, s2, P1=True, P2 else: z_exact = np.dot(x.toarray(), y.toarray()) - # Test exact value and symetry of the scalar product + # Compute axpy exact sol + if dtype == complex: + cst = 5j + else: + cst = 5 + + z3 = x + cst * y + x.mul_iadd(cst, y) + + # Test exact value and symmetry of the scalar product assert z1.dtype == dtype assert z2.dtype == dtype assert z1 == z_exact assert z2 == z_exact.conjugate() + assert np.allclose(x._data, z3._data) # =============================================================================== @pytest.mark.parametrize('dtype', [float, complex]) @@ -690,6 +700,17 @@ def test_stencil_vector_2d_parallel_dot(dtype, n1, n2, p1, p2, s1, s2, P1=True, res_ex1 = comm.allreduce(np.dot(x.toarray(), y.toarray())) res_ex2 = res_ex1 + # Compute axpy exact sol + if dtype == complex: + cst = 5j + else: + cst = 5 + + z3 = x + cst * y + x.mul_iadd(cst, y) + + # Test exact value and symmetry of the scalar product + assert np.allclose(x._data, z3._data) assert res1 == res_ex1 assert res2 == res_ex2 @@ -748,6 +769,18 @@ def test_stencil_vector_3d_parallel_dot(dtype, n1, n2, n3, p1, p2, p3, s1, s2, s res_ex1 = comm.allreduce(np.dot(x.toarray(), y.toarray())) res_ex2 = res_ex1 + # Compute axpy exact sol + if dtype == complex: + cst = 5j + else: + cst = 5 + + z3 = x + cst * y + x.mul_iadd(cst, y) + + # Test exact value and symmetry of the scalar product + assert np.allclose(x._data, z3._data) + assert res1 == res_ex1 assert res2 == res_ex2 diff --git a/psydac/polar/dense.py b/psydac/polar/dense.py index edf7c908b..2e209f95e 100644 --- a/psydac/polar/dense.py +++ b/psydac/polar/dense.py @@ -137,6 +137,10 @@ def zeros(self): data = np.zeros(self.ncoeff, dtype=self.dtype) return DenseVector(self, data) + # ... + def axpy(self, a, x, y): + y += a * x + #------------------------------------- # Other properties/methods #-------------------------------------