diff --git a/Makefile b/Makefile index d3a3f43a..74372aaa 100644 --- a/Makefile +++ b/Makefile @@ -170,7 +170,7 @@ docker-mac: prereq: - $(PYTHON) -m pip install $(PIP_FLAGS) $(PIP_INSTALL_FLAGS) Cython cython numpy scipy matplotlib pyyaml h5py pybind11 MeshPy tabulate modepy mpi4py pyamg + $(PYTHON) -m pip install $(PIP_FLAGS) $(PIP_INSTALL_FLAGS) wheel Cython cython numpy scipy matplotlib pyyaml h5py pybind11 MeshPy tabulate modepy mpi4py pyamg meshio $(PYTHON) -m pip install $(PIP_FLAGS) $(PIP_INSTALL_FLAGS) scikit-sparse prereq-extra: diff --git a/base/PyNucleus_base/LinearOperator_{SCALAR}.pxi b/base/PyNucleus_base/LinearOperator_{SCALAR}.pxi index 0457328f..bdccf952 100644 --- a/base/PyNucleus_base/LinearOperator_{SCALAR}.pxi +++ b/base/PyNucleus_base/LinearOperator_{SCALAR}.pxi @@ -108,7 +108,7 @@ cdef class {SCALAR_label}LinearOperator: raise NotImplementedError('Cannot multiply {} with {}:\n{}'.format(self, x, e)) def __rmul__(self, x): - if isinstance(x, {SCALAR}): + if isinstance(x, (float, int, {SCALAR})): return {SCALAR_label}Multiply_Linear_Operator(self, x) else: raise NotImplementedError('Cannot multiply with {}'.format(x)) @@ -264,6 +264,9 @@ cdef class {SCALAR_label}LinearOperator: diagonal = property(fget=get_diagonal, fset=set_diagonal) + def getMemorySize(self): + return -1 + cdef class {SCALAR_label}TimeStepperLinearOperator({SCALAR_label}LinearOperator): def __init__(self, @@ -356,6 +359,9 @@ cdef class {SCALAR_label}TimeStepperLinearOperator({SCALAR_label}LinearOperator) else: return super({SCALAR_label}TimeStepperLinearOperator, self).__mul__(x) + def getMemorySize(self): + return self.M.getMemorySize()+self.S.getMemorySize() + cdef class {SCALAR_label}Multiply_Linear_Operator({SCALAR_label}LinearOperator): def __init__(self, @@ -419,6 +425,9 @@ cdef class {SCALAR_label}Multiply_Linear_Operator({SCALAR_label}LinearOperator): def __repr__(self): return '{}*{}'.format(self.factor, self.A) + def getMemorySize(self): + return self.A.getMemorySize() + cdef class {SCALAR_label}Product_Linear_Operator({SCALAR_label}LinearOperator): def __init__(self, @@ -495,6 +504,9 @@ cdef class {SCALAR_label}Product_Linear_Operator({SCALAR_label}LinearOperator): def __repr__(self): return '{}*{}'.format(self.A, self.B) + def getMemorySize(self): + return self.A.getMemorySize()+self.B.getMemorySize() + cdef class {SCALAR_label}VectorLinearOperator: def __init__(self, int num_rows, int num_columns, int vectorSize): diff --git a/base/PyNucleus_base/solvers.pyx b/base/PyNucleus_base/solvers.pyx index edaccdc3..2a25acfa 100644 --- a/base/PyNucleus_base/solvers.pyx +++ b/base/PyNucleus_base/solvers.pyx @@ -1101,7 +1101,10 @@ cdef class complex_lu_solver(complex_solver): if A is not None: self.A = A - if isinstance(self.A, (ComplexLinearOperator, HelmholtzShiftOperator)): + if isinstance(self.A, ComplexDense_LinearOperator): + from scipy.linalg import lu_factor + self.lu, self.perm = lu_factor(self.A.data) + elif isinstance(self.A, (ComplexLinearOperator, HelmholtzShiftOperator)): from scipy.sparse.linalg import splu try: if isinstance(self.A, ComplexSSS_LinearOperator): diff --git a/base/PyNucleus_base/utilsFem.py b/base/PyNucleus_base/utilsFem.py index 6b78a1af..f159b4c5 100644 --- a/base/PyNucleus_base/utilsFem.py +++ b/base/PyNucleus_base/utilsFem.py @@ -346,7 +346,7 @@ def loadDictFromHDF5(f): else: params[key] = LinearOperator.HDF5read(f[key]) elif 'vertices' in f[key] and 'cells' in f[key]: - from PyNucleus.fem import meshNd + from PyNucleus.fem.mesh import meshNd params[key] = meshNd.HDF5read(f[key]) else: params[key] = loadDictFromHDF5(f[key]) diff --git a/drivers/testDistOp.py b/drivers/testDistOp.py index e396c523..5a292c0b 100755 --- a/drivers/testDistOp.py +++ b/drivers/testDistOp.py @@ -344,7 +344,8 @@ lcl_dm.inner) cg.maxIter = 1000 u = lcl_dm.zeros() - cg(b, u) + with d.timer('CG solve'): + cg(b, u) residuals = cg.residuals solveGroup = d.addOutputGroup('solve', tested=True, rTol=1e-1) diff --git a/fem/PyNucleus_fem/DoFMaps.pyx b/fem/PyNucleus_fem/DoFMaps.pyx index 89da5b26..25de7931 100644 --- a/fem/PyNucleus_fem/DoFMaps.pyx +++ b/fem/PyNucleus_fem/DoFMaps.pyx @@ -798,7 +798,7 @@ cdef class DoFMap: """ try: - from PyNucleus_nl.kernelsCy import RangedFractionalKernel + from PyNucleus_nl.kernelsCy import RangedFractionalKernel, ComplexKernel if isinstance(kernel, RangedFractionalKernel): from PyNucleus_base.linear_operators import multiIntervalInterpolationOperator @@ -831,6 +831,26 @@ cdef class DoFMap: return self.scalarDM.assembleNonlocal(kernel, matrixFormat, dm2.scalarDM, returnNearField, **kwargs) else: return self.scalarDM.assembleNonlocal(kernel, matrixFormat, None, returnNearField, **kwargs) + elif isinstance(kernel, ComplexKernel): + from PyNucleus_nl.nonlocalLaplacian import ComplexnonlocalBuilder + + builder = ComplexnonlocalBuilder(self.mesh, self, kernel, dm2=dm2, **kwargs) + if matrixFormat.upper() == 'DENSE': + return builder.getDense() + elif matrixFormat.upper() == 'DIAGONAL': + return builder.getDiagonal() + elif matrixFormat.upper() == 'SPARSIFIED': + return builder.getDense(trySparsification=True) + elif matrixFormat.upper() == 'SPARSE': + return builder.getSparse(returnNearField=returnNearField) + elif matrixFormat.upper() == 'H2': + return builder.getH2(returnNearField=returnNearField) + elif matrixFormat.upper() == 'H2CORRECTED': + A = builder.getH2FiniteHorizon() + A.setKernel(kernel) + return A + else: + raise NotImplementedError('Unknown matrix format: {}'.format(matrixFormat)) else: from PyNucleus_nl import nonlocalBuilder @@ -1153,31 +1173,57 @@ cdef class DoFMap: return y, dm def augmentWithBoundaryData(self, - const REAL_t[::1] x, - const REAL_t[::1] boundaryData): + x, + boundaryData): "Augment the finite element function with boundary data." cdef: DoFMap dm - fe_vector y - REAL_t[::1] yy + fe_vector yReal + REAL_t[::1] xReal, boundaryReal, yyReal + complex_fe_vector yComplex + COMPLEX_t[::1] xComplex, boundaryComplex, yyComplex INDEX_t i, k, dof, dof2, num_cells = self.mesh.num_cells if isinstance(self, Product_DoFMap): dm = Product_DoFMap(type(self.scalarDM)(self.mesh, tag=MAX_INT), self.numComponents) else: dm = type(self)(self.mesh, tag=MAX_INT) - y = dm.empty(dtype=REAL) - yy = y - for i in range(num_cells): - for k in range(self.dofs_per_element): - dof = self.cell2dof(i, k) - dof2 = dm.cell2dof(i, k) - if dof >= 0: - yy[dof2] = x[dof] - else: - yy[dof2] = boundaryData[-dof-1] - return y + if ((isinstance(x, fe_vector) and isinstance(boundaryData, fe_vector)) or + (isinstance(x, np.ndarray) and x.dtype == REAL) and (isinstance(boundaryData, np.ndarray) and boundaryData.dtype == REAL)): + + xReal = x + boundaryReal = boundaryData + yReal = dm.empty(dtype=REAL) + yyReal = yReal + + for i in range(num_cells): + for k in range(self.dofs_per_element): + dof = self.cell2dof(i, k) + dof2 = dm.cell2dof(i, k) + if dof >= 0: + yyReal[dof2] = xReal[dof] + else: + yyReal[dof2] = boundaryReal[-dof-1] + return yReal + elif ((isinstance(x, complex_fe_vector) and isinstance(boundaryData, complex_fe_vector)) or + (isinstance(x, np.ndarray) and x.dtype == REAL) and (isinstance(boundaryData, np.ndarray) and boundaryData.dtype == REAL)): + xComplex = x + boundaryComplex = boundaryData + yComplex = dm.empty(dtype=COMPLEX) + yyComplex = yComplex + + for i in range(num_cells): + for k in range(self.dofs_per_element): + dof = self.cell2dof(i, k) + dof2 = dm.cell2dof(i, k) + if dof >= 0: + yyComplex[dof2] = xComplex[dof] + else: + yyComplex[dof2] = boundaryComplex[-dof-1] + return yComplex + else: + raise NotImplementedError() def getFullDoFMap(self, DoFMap complement_dm): cdef: diff --git a/fem/PyNucleus_fem/factories.py b/fem/PyNucleus_fem/factories.py index 9fe4114a..bafaf259 100644 --- a/fem/PyNucleus_fem/factories.py +++ b/fem/PyNucleus_fem/factories.py @@ -34,7 +34,7 @@ radialIndicator, fractalDiffusivity, expDiffusivity, componentVectorFunction) -from . lookupFunction import lookupFunction +from . lookupFunction import lookupFunction, vectorLookupFunction rhsFunSin1D = _rhsFunSin1D() @@ -96,6 +96,7 @@ def rhsFractional2D_nonPeriodic(s): functionFactory.register('x1**3', monomial, params={'exponent': np.array([0., 3., 0.])}) functionFactory.register('x2**3', monomial, params={'exponent': np.array([0., 0., 3.])}) functionFactory.register('Lambda', Lambda) +functionFactory.register('complexLambda', complexLambda) functionFactory.register('squareIndicator', squareIndicator) functionFactory.register('radialIndicator', radialIndicator) functionFactory.register('rhsBoundaryLayer2D', _rhsBoundaryLayer2D) @@ -110,6 +111,7 @@ def rhsFractional2D_nonPeriodic(s): functionFactory.register('inclusionsHong', inclusionsHong) functionFactory.register('motorPermeability', motorPermeability) functionFactory.register('lookup', lookupFunction) +functionFactory.register('vectorLookup', vectorLookupFunction) functionFactory.register('shiftScaleFunctor', shiftScaleFunctor) functionFactory.register('componentVectorFunction', componentVectorFunction, aliases=['vector']) @@ -143,6 +145,7 @@ def __call__(self, mesh, *args, **kwargs): circle, graded_circle, cutoutCircle, twinDisc, dumbbell, wrench, Hshape, ball, rectangle, crossSquare, gradedSquare, gradedBox, + squareWithCircularCutout, boxWithBallCutout, disconnectedInterval, disconnectedDomain, double_graded_interval, simpleFicheraCube, uniformSquare, @@ -174,8 +177,11 @@ def __call__(self, mesh, *args, **kwargs): meshFactory.register('graded_circle', graded_circle, 2, aliases=['gradedCircle']) meshFactory.register('discWithInteraction', discWithInteraction, 2) meshFactory.register('cutoutCircle', cutoutCircle, 2, aliases=['cutoutDisc']) +meshFactory.register('squareWithCircularCutout', squareWithCircularCutout, 2) +meshFactory.register('boxWithBallCutout', boxWithBallCutout, 3, aliases=['boxMinusBall']) meshFactory.register('simpleBox', simpleBox, 3, aliases=['unitBox', 'cube', 'unitCube']) meshFactory.register('box', box, 3) +meshFactory.register('ball', ball, 3) meshFactory.register('simpleFicheraCube', simpleFicheraCube, 3, aliases=['fichera', 'ficheraCube']) meshFactory.register('standardSimplex2D', standardSimplex2D, 2) meshFactory.register('standardSimplex3D', standardSimplex3D, 3) diff --git a/fem/PyNucleus_fem/femCy.pxd b/fem/PyNucleus_fem/femCy.pxd index 141d8df6..686548b8 100644 --- a/fem/PyNucleus_fem/femCy.pxd +++ b/fem/PyNucleus_fem/femCy.pxd @@ -32,3 +32,34 @@ cdef class multi_function: public INDEX_t numInputs, numOutputs cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y) + + +cdef class simplexComputations: + cdef: + REAL_t[:, ::1] simplex + cdef void setSimplex(self, REAL_t[:, ::1] simplex) + cdef REAL_t evalVolume(self) + cdef REAL_t evalVolumeGradients(self, + REAL_t[:, ::1] gradients) + cdef REAL_t evalVolumeGradientsInnerProducts(self, + REAL_t[:, ::1] gradients, + REAL_t[::1] innerProducts) + cdef REAL_t evalSimplexVolumeGradientsInnerProducts(self, + const REAL_t[:, ::1] simplex, + REAL_t[:, ::1] gradients, + REAL_t[::1] innerProducts) + + +cdef class simplexComputations1D(simplexComputations): + cdef: + REAL_t[:, ::1] temp + + +cdef class simplexComputations2D(simplexComputations): + cdef: + REAL_t[:, ::1] temp + + +cdef class simplexComputations3D(simplexComputations): + cdef: + REAL_t[:, ::1] temp diff --git a/fem/PyNucleus_fem/femCy.pyx b/fem/PyNucleus_fem/femCy.pyx index a0c0dd3a..f91d4c2f 100644 --- a/fem/PyNucleus_fem/femCy.pyx +++ b/fem/PyNucleus_fem/femCy.pyx @@ -38,6 +38,7 @@ from . DoFMaps cimport (P0_DoFMap, P1_DoFMap, P2_DoFMap, P3_DoFMap, from . quadrature cimport simplexQuadratureRule, Gauss1D, Gauss2D, Gauss3D, simplexXiaoGimbutas from . functions cimport function, complexFunction, vectorFunction, matrixFunction from . simplexMapper cimport simplexMapper +from scipy.spatial import cKDTree cdef class local_matrix_t: @@ -160,9 +161,6 @@ cdef inline REAL_t simplexVolumeAndProducts1D(const REAL_t[:, ::1] simplex, cdef class simplexComputations: - cdef: - REAL_t[:, ::1] simplex - cdef void setSimplex(self, REAL_t[:, ::1] simplex): self.simplex = simplex @@ -202,11 +200,31 @@ coordinates """ pass + def setSimplex_py(self, REAL_t[:, ::1] simplex): + self.setSimplex(simplex) -cdef class simplexComputations1D(simplexComputations): - cdef: - REAL_t[:, ::1] temp + def evalVolume_py(self): + return self.evalVolume() + + def evalVolumeGradients_py(self): + gradients = np.zeros((self.simplex.shape[0], self.simplex.shape[1]), dtype=REAL) + vol = self.evalVolumeGradients(gradients) + return vol, gradients + + def evalVolumeGradientsInnerProducts_py(self): + gradients = np.zeros((self.simplex.shape[0], self.simplex.shape[1]), dtype=REAL) + innerProducts = np.zeros(((self.simplex.shape[0]*(self.simplex.shape[0]+1))//2), dtype=REAL) + vol = self.evalVolumeGradientsInnerProducts(gradients, innerProducts) + return vol, gradients, innerProducts + + def evalSimplexVolumeGradientsInnerProducts_py(self, REAL_t[:,::1] simplex): + gradients = np.zeros((simplex.shape[0], simplex.shape[1]), dtype=REAL) + innerProducts = np.zeros(((simplex.shape[0]*(simplex.shape[0]+1))//2), dtype=REAL) + vol = self.evalSimplexVolumeGradientsInnerProducts(simplex, gradients, innerProducts) + return vol, gradients, innerProducts + +cdef class simplexComputations1D(simplexComputations): cdef REAL_t evalVolume(self): cdef: REAL_t vol @@ -263,9 +281,6 @@ cdef class simplexComputations1D(simplexComputations): cdef class simplexComputations2D(simplexComputations): - cdef: - REAL_t[:, ::1] temp - def __init__(self): self.temp = uninitialized((2, 2), dtype=REAL) @@ -358,9 +373,6 @@ cdef class simplexComputations2D(simplexComputations): cdef class simplexComputations3D(simplexComputations): - cdef: - REAL_t[:, ::1] temp - def __init__(self): self.temp = uninitialized((7, 3), dtype=REAL) @@ -1547,6 +1559,58 @@ def getSurfaceDoFMap(meshBase mesh, return dmS +def getSurfaceToVolumeProlongation(DoFMap dmVolume, + DoFMap dmSurface): + cdef: + INDEX_t[::1] indptr, indices + REAL_t[::1] data + INDEX_t cellNo, localVertexNo, vertexNo, k, dof_volume, dof_surface, jj + INDEX_t[::1] dofSurface2dofVolume + REAL_t[:, ::1] coords + dofSurface2dofVolume = uninitialized((dmSurface.num_dofs), dtype=INDEX) + # check that we did not eliminate vertices + if dmVolume.mesh.num_vertices == dmSurface.mesh.num_vertices: + assert isinstance(dmVolume, P1_DoFMap) + assert isinstance(dmSurface, P1_DoFMap) + for cellNo in range(dmSurface.mesh.num_cells): + for localVertexNo in range(dmSurface.mesh.cells.shape[1]): + vertexNo = dmSurface.mesh.cells[cellNo, localVertexNo] + dof_surface = vertexNo + dof_volume = vertexNo + dofSurface2dofVolume[dof_surface] = dof_volume + else: + kd = cKDTree(dmVolume.getDoFCoordinates()) + coords = dmSurface.getDoFCoordinates() + for dof_surface in range(dmSurface.num_dofs): + dof_volume = kd.query(coords[dof_surface, :], 1)[1] + dofSurface2dofVolume[dof_surface] = dof_volume + + indptr = np.zeros((dmVolume.num_dofs+1), dtype=INDEX) + indices = uninitialized((dmSurface.num_dofs), dtype=INDEX) + data = np.ones((dmSurface.num_dofs), dtype=REAL) + + for cellNo in range(dmSurface.mesh.num_cells): + for localVertexNo in range(dmSurface.mesh.cells.shape[1]): + for k in range(dmSurface.dofs_per_vertex): + dof_surface = dmSurface.cell2dof(cellNo, localVertexNo*dmSurface.dofs_per_vertex+k) + dof_volume = dofSurface2dofVolume[dof_surface] + if dof_surface >= 0 and dof_volume >= 0: + indptr[dof_volume+1] = 1 + for dof_volume in range(dmVolume.num_dofs): + indptr[dof_volume+1] += indptr[dof_volume] + for cellNo in range(dmSurface.mesh.num_cells): + for localVertexNo in range(dmSurface.mesh.cells.shape[1]): + for k in range(dmSurface.dofs_per_vertex): + dof_surface = dmSurface.cell2dof(cellNo, localVertexNo*dmSurface.dofs_per_vertex+k) + dof_volume = dofSurface2dofVolume[dof_surface] + if dof_surface >= 0 and dof_volume >= 0: + jj = indptr[dof_volume] + indices[jj] = dof_surface + P = CSR_LinearOperator(indices, indptr, data) + P.num_columns = dmSurface.num_dofs + return P + + def assembleSurfaceMass(meshBase mesh, meshBase surface, DoFMap volumeDoFMap, @@ -1709,33 +1773,45 @@ def assembleStiffness(DoFMap dm, DoFMap dm2=None): cdef: INDEX_t dim = dm.mesh.dim + INDEX_t manifold_dim = dm.mesh.manifold_dim local_matrix_t local_matrix if diffusivity is None: - if isinstance(dm, P1_DoFMap): - if dim == 1: - local_matrix = stiffness_1d_sym_P1() - elif dim == 2: - local_matrix = stiffness_2d_sym_P1() - elif dim == 3: - local_matrix = stiffness_3d_sym_P1() - else: - raise NotImplementedError() - elif isinstance(dm, P2_DoFMap): - if dim == 1: - local_matrix = stiffness_1d_sym_P2() - elif dim == 2: - local_matrix = stiffness_2d_sym_P2() - elif dim == 3: - local_matrix = stiffness_3d_sym_P2() + if dim == manifold_dim: + if isinstance(dm, P1_DoFMap): + if dim == 1: + local_matrix = stiffness_1d_sym_P1() + elif dim == 2: + local_matrix = stiffness_2d_sym_P1() + elif dim == 3: + local_matrix = stiffness_3d_sym_P1() + else: + raise NotImplementedError() + elif isinstance(dm, P2_DoFMap): + if dim == 1: + local_matrix = stiffness_1d_sym_P2() + elif dim == 2: + local_matrix = stiffness_2d_sym_P2() + elif dim == 3: + local_matrix = stiffness_3d_sym_P2() + else: + raise NotImplementedError() + elif isinstance(dm, P3_DoFMap): + if dim == 1: + local_matrix = stiffness_1d_sym_P3() + elif dim == 2: + local_matrix = stiffness_2d_sym_P3() + elif dim == 3: + local_matrix = stiffness_3d_sym_P3() + else: + raise NotImplementedError() else: raise NotImplementedError() - elif isinstance(dm, P3_DoFMap): - if dim == 1: - local_matrix = stiffness_1d_sym_P3() - elif dim == 2: - local_matrix = stiffness_2d_sym_P3() - elif dim == 3: - local_matrix = stiffness_3d_sym_P3() + elif dim-1 == manifold_dim: + if isinstance(dm, P1_DoFMap): + if dim == 2: + local_matrix = stiffness_1d_in_2d_sym_P1() + else: + raise NotImplementedError() else: raise NotImplementedError() else: diff --git a/fem/PyNucleus_fem/functions.pyx b/fem/PyNucleus_fem/functions.pyx index 52a93e49..b6d09c5f 100644 --- a/fem/PyNucleus_fem/functions.pyx +++ b/fem/PyNucleus_fem/functions.pyx @@ -59,6 +59,9 @@ cdef class function: else: return NotImplemented + def __rmul__(self, other): + return self.__mul__(other) + def __neg__(self): if isinstance(self, mulFunction): return mulFunction(self.f, -self.fac) @@ -1838,6 +1841,9 @@ cdef class complexFunction: else: return NotImplemented + def __rmul__(self, other): + return self.__mul__(other) + def __neg__(self): if isinstance(self, complexMulFunction): return complexMulFunction(self.f, -self.fac) diff --git a/fem/PyNucleus_fem/lookupFunction.pxd b/fem/PyNucleus_fem/lookupFunction.pxd index 0c3c9968..d5330054 100644 --- a/fem/PyNucleus_fem/lookupFunction.pxd +++ b/fem/PyNucleus_fem/lookupFunction.pxd @@ -6,7 +6,8 @@ ################################################################################### from PyNucleus_base.myTypes cimport REAL_t, INDEX_t -from . functions cimport function +from . functions cimport function, vectorFunction +from . femCy cimport simplexComputations from . meshCy cimport meshBase, cellFinder2 from . DoFMaps cimport DoFMap @@ -17,3 +18,15 @@ cdef class lookupFunction(function): public DoFMap dm public REAL_t[::1] u public cellFinder2 cellFinder + + +cdef class vectorLookupFunction(vectorFunction): + cdef: + meshBase mesh + public DoFMap dm + public REAL_t[::1] u + public cellFinder2 cellFinder + simplexComputations sC + REAL_t[::1] temp + REAL_t[:, ::1] simplex + REAL_t[:, ::1] gradients diff --git a/fem/PyNucleus_fem/lookupFunction.pyx b/fem/PyNucleus_fem/lookupFunction.pyx index 2c8b15d5..dd213a40 100644 --- a/fem/PyNucleus_fem/lookupFunction.pyx +++ b/fem/PyNucleus_fem/lookupFunction.pyx @@ -5,7 +5,10 @@ # If you want to use this code, please refer to the README.rst and LICENSE files. # ################################################################################### -from . DoFMaps cimport shapeFunction +from PyNucleus_base.myTypes import REAL +from PyNucleus_base.blas import uninitialized +from . DoFMaps cimport shapeFunction, vectorShapeFunction +from . femCy cimport simplexComputations1D, simplexComputations2D, simplexComputations3D cdef class lookupFunction(function): @@ -33,3 +36,47 @@ cdef class lookupFunction(function): shapeFun = self.dm.localShapeFunctions[k] val += shapeFun.eval(self.cellFinder.bary)*self.u[dof] return val + + +cdef class vectorLookupFunction(vectorFunction): + def __init__(self, meshBase mesh, DoFMap dm, REAL_t[::1] u, cellFinder2 cF=None): + super(vectorLookupFunction, self).__init__(mesh.dim) + self.mesh = mesh + self.dm = dm + self.u = u + if cF is None: + self.cellFinder = cellFinder2(self.mesh) + else: + self.cellFinder = cF + if self.mesh.dim == 1: + self.sC = simplexComputations1D() + elif self.mesh.dim == 2: + self.sC = simplexComputations2D() + elif self.mesh.dim == 3: + self.sC = simplexComputations3D() + else: + raise NotImplementedError() + self.simplex = uninitialized((self.mesh.dim+1, self.mesh.dim), dtype=REAL) + self.sC.setSimplex(self.simplex) + self.temp = uninitialized((self.mesh.dim), dtype=REAL) + self.gradients = uninitialized((self.mesh.dim+1, self.mesh.dim), dtype=REAL) + + cdef void eval(self, REAL_t[::1] x, REAL_t[::1] vals): + cdef: + vectorShapeFunction shapeFun + INDEX_t cellNo, dof, k, componentNo + for componentNo in range(self.mesh.dim): + vals[componentNo] = 0. + cellNo = self.cellFinder.findCell(x) + if cellNo == -1: + return + self.mesh.getSimplex(cellNo, self.simplex) + self.sC.evalVolumeGradients(self.gradients) + for k in range(self.dm.dofs_per_element): + dof = self.dm.cell2dof(cellNo, k) + if dof >= 0: + shapeFun = self.dm.localShapeFunctions[k] + shapeFun.setCell(self.mesh.cells[cellNo, :]) + shapeFun.eval(self.cellFinder.bary, self.gradients, self.temp) + for componentNo in range(self.mesh.dim): + vals[componentNo] += self.u[dof]*self.temp[componentNo] diff --git a/fem/PyNucleus_fem/mesh.py b/fem/PyNucleus_fem/mesh.py index 40c22a09..46765db0 100644 --- a/fem/PyNucleus_fem/mesh.py +++ b/fem/PyNucleus_fem/mesh.py @@ -18,6 +18,7 @@ boundaryVertices, boundaryEdges, boundaryFaces, + boundaryFacesWithOrientation, boundaryVerticesFromBoundaryEdges, boundaryEdgesFromBoundaryFaces, radialMeshTransformation) @@ -1018,6 +1019,49 @@ def circleWithInnerRadius(n, radius=2., innerRadius=1., returnFacets=False, **kw return mesh +def squareWithCircularCutout(ax=-3., ay=-3., bx=3., by=3., radius=1., num_points_per_unit_len=2): + from . meshConstruction import polygon, circle + square = polygon([(ax, ay), (bx, ay), (bx, by), (ax, by)]) + frame = square+circle((0, 0), radius, num_points_per_unit_len=num_points_per_unit_len) + frame.holes.append((0, 0)) + return frame.mesh() + + +def boxWithBallCutout(ax=-3., ay=-3., az=-3., bx=3., by=3., bz=3., + radius=1., points=4, radial_subdiv=None, **kwargs): + from meshpy.tet import MeshInfo, build # Options + from meshpy.geometry import generate_surface_of_revolution, EXT_OPEN, GeometryBuilder, make_box + + if radial_subdiv is None: + radial_subdiv = 2*points+2 + + dphi = np.pi/points + + def truncate(r): + if abs(r) < 1e-10: + return 0 + else: + return r + + rz = [(truncate(radius*np.sin(i*dphi)), radius*np.cos(i*dphi)) for i in range(points+1)] + + geob = GeometryBuilder() + geob.add_geometry(*generate_surface_of_revolution(rz, + closure=EXT_OPEN, + radial_subdiv=radial_subdiv)) + points, facets, _, facet_markers = make_box((ax, ay, az), (bx, by, bz)) + geob.add_geometry(points, facets, facet_markers=facet_markers) + mesh_info = MeshInfo() + geob.set(mesh_info) + mesh_info.set_holes([(0., 0., 0.)]) + mesh_meshpy = build(mesh_info, **kwargs) # , options=Options(switches='pq1.2/10') + mesh = mesh3d(np.array(mesh_meshpy.points, dtype=REAL), + np.array(mesh_meshpy.elements, dtype=INDEX)) + from PyNucleus_fem.meshCy import radialMeshTransformer + mesh.setMeshTransformation(radialMeshTransformer(radius)) + return mesh + + def gradedIntervals(intervals, h): intervals = list(sorted(intervals, key=lambda int: int[0])) @@ -1507,7 +1551,7 @@ def get_boundary_faces(self): if self.dim <= 2: self._boundaryFaces = uninitialized((0, 3), dtype=INDEX) elif self.dim == 3: - self._boundaryFaces = boundaryFaces(self.cells) + self._boundaryFaces = boundaryFacesWithOrientation(self.vertices, self.cells) return self._boundaryFaces else: return self._boundaryFaces @@ -1785,11 +1829,11 @@ def HDF5read(node): def exportVTK(self, filename, cell_data=None): import meshio - if self.dim == 1: + if self.manifold_dim == 1: cell_type = 'line' - elif self.dim == 2: + elif self.manifold_dim == 2: cell_type = 'triangle' - elif self.dim == 3: + elif self.manifold_dim == 3: cell_type = 'tetra' else: raise NotImplementedError() @@ -1801,9 +1845,9 @@ def exportVTK(self, filename, cell_data=None): cell_data=cell_data), file_format='vtk') - def exportSolutionVTK(self, x, filename, labels='solution', cell_data=None): + def exportSolutionVTK(self, x, filename, labels='solution', cell_data={}): import meshio - from . DoFMaps import Product_DoFMap + from . DoFMaps import Product_DoFMap, P0_DoFMap if not isinstance(x, (list, tuple)): x = [x] labels = [labels] @@ -1811,29 +1855,36 @@ def exportSolutionVTK(self, x, filename, labels='solution', cell_data=None): assert len(x) == len(labels) point_data = {} for xx, label in zip(x, labels): - sol = xx.linearPart() - - if isinstance(xx.dm, Product_DoFMap): - v2d = uninitialized((self.num_vertices, 1), dtype=INDEX) - sol.dm.getVertexDoFs(v2d) - sol2 = np.zeros((self.num_vertices, sol.dm.numComponents), dtype=REAL) - for component in range(sol.dm.numComponents): - R, _ = sol.dm.getRestrictionProlongation(component) - for i in range(self.num_vertices): - sol2[i, component] = (R*sol)[v2d[i, 0]] - point_data[label] = sol2 + if isinstance(xx.dm, P0_DoFMap): + cell_data[label] = [xx.toarray()] else: - v2d = uninitialized((self.num_vertices, 1), dtype=INDEX) - sol.dm.getVertexDoFs(v2d) - sol2 = np.zeros((self.num_vertices), dtype=REAL) - for i in range(self.num_vertices): - sol2[i] = sol[v2d[i, 0]] - point_data[label] = np.array(sol2) - if self.dim == 1: + sol = xx.linearPart() + + if isinstance(xx.dm, Product_DoFMap): + v2d = -np.ones((self.num_vertices, 1), dtype=INDEX) + sol.dm.getVertexDoFs(v2d) + sol2 = np.zeros((self.num_vertices, sol.dm.numComponents), dtype=REAL) + for component in range(sol.dm.numComponents): + R, _ = sol.dm.getRestrictionProlongation(component) + for i in range(self.num_vertices): + dof = v2d[i, 0] + if dof >= 0: + sol2[i, component] = (R*sol)[dof] + point_data[label] = sol2 + else: + v2d = -np.ones((self.num_vertices, 1), dtype=INDEX) + sol.dm.getVertexDoFs(v2d) + sol2 = np.zeros((self.num_vertices), dtype=REAL) + for i in range(self.num_vertices): + dof = v2d[i, 0] + if dof >= 0: + sol2[i] = sol[dof] + point_data[label] = np.array(sol2) + if self.manifold_dim == 1: cell_type = 'line' - elif self.dim == 2: + elif self.manifold_dim == 2: cell_type = 'triangle' - elif self.dim == 3: + elif self.manifold_dim == 3: cell_type = 'tetra' else: raise NotImplementedError() @@ -1843,7 +1894,7 @@ def exportSolutionVTK(self, x, filename, labels='solution', cell_data=None): meshio.Mesh(vertices, {cell_type: self.cells_as_array}, point_data=point_data, - cell_data=cell_data,), + cell_data=cell_data), file_format='vtk') @staticmethod @@ -2829,9 +2880,6 @@ def plotInterfaceVTK(self, interface): return ugridActor - def get_surface_mesh(self, tag=None): - return mesh2d(self.vertices, self.getBoundaryFacesByTag(tag)) - def checkDoFMap(self, DoFMap): "Plot the DoF numbers on the mesh." recorderdDofs = {} diff --git a/fem/PyNucleus_fem/meshCy.pyx b/fem/PyNucleus_fem/meshCy.pyx index e9cef443..634f6efc 100644 --- a/fem/PyNucleus_fem/meshCy.pyx +++ b/fem/PyNucleus_fem/meshCy.pyx @@ -32,8 +32,12 @@ cdef class meshTransformer: cdef class radialMeshTransformer(meshTransformer): - def __init__(self): + cdef: + REAL_t radius + + def __init__(self, REAL_t radius=0.): super(radialMeshTransformer, self).__init__() + self.radius = radius def __call__(self, meshBase mesh, dict lookup): cdef: @@ -43,24 +47,46 @@ cdef class radialMeshTransformer(meshTransformer): REAL_t r1, r2, r, r3 INDEX_t dim = mesh.dim REAL_t[:, ::1] vertices = mesh.vertices - for encodeVal in lookup: - decode_edge(encodeVal, e) - vertexNo = lookup[encodeVal] - r1 = 0. - for i in range(dim): - r1 += vertices[e[0], i]**2 - r1 = sqrt(r1) - r2 = 0. - for i in range(dim): - r2 += vertices[e[1], i]**2 - r2 = sqrt(r2) - r = 0.5*r1 + 0.5*r2 - r3 = 0. - for i in range(dim): - r3 += vertices[vertexNo, i]**2 - r3 = sqrt(r3) - for i in range(dim): - mesh.vertices[vertexNo, i] *= r/r3 + if self.radius > 0.: + for encodeVal in lookup: + decode_edge(encodeVal, e) + vertexNo = lookup[encodeVal] + r3 = 0. + for i in range(dim): + r3 += vertices[vertexNo, i]**2 + r3 = sqrt(r3) + if r3 < self.radius: + r1 = 0. + for i in range(dim): + r1 += vertices[e[0], i]**2 + r1 = sqrt(r1) + r2 = 0. + for i in range(dim): + r2 += vertices[e[1], i]**2 + r2 = sqrt(r2) + r = 0.5*r1 + 0.5*r2 + + for i in range(dim): + mesh.vertices[vertexNo, i] *= r/r3 + else: + for encodeVal in lookup: + decode_edge(encodeVal, e) + vertexNo = lookup[encodeVal] + r1 = 0. + for i in range(dim): + r1 += vertices[e[0], i]**2 + r1 = sqrt(r1) + r2 = 0. + for i in range(dim): + r2 += vertices[e[1], i]**2 + r2 = sqrt(r2) + r = 0.5*r1 + 0.5*r2 + r3 = 0. + for i in range(dim): + r3 += vertices[vertexNo, i]**2 + r3 = sqrt(r3) + for i in range(dim): + mesh.vertices[vertexNo, i] *= r/r3 cdef class gradedMeshTransformer(meshTransformer): @@ -594,7 +620,7 @@ cdef class meshBase: if hasattr(self, '_boundaryVertices'): numBoundaryVertices = 0 boundaryVertices = self._boundaryVertices - boundaryVertexTags = self._boundaryVertexTags + boundaryVertexTags = self.boundaryVertexTags for i in range(boundaryVertices.shape[0]): v = boundaryVertices[i] j = mapping[v] @@ -1848,6 +1874,71 @@ def boundaryFaces(INDEX_t[:, ::1] cells): return bfaces_mem +def boundaryFacesWithOrientation(REAL_t[:, ::1] vertices, INDEX_t[:, ::1] cells): + cdef: + INDEX_t num_cells = cells.shape[0], i, k, j, cellNo + INDEX_t v0, v1, v2, v3 + INDEX_t[:, ::1] faces = uninitialized((4, 3), dtype=INDEX) + INDEX_t[::1] f012 = faces[0, :] + INDEX_t[::1] f013 = faces[1, :] + INDEX_t[::1] f023 = faces[2, :] + INDEX_t[::1] f123 = faces[3, :] + INDEX_t[::1] f = faces[3, :] + REAL_t[::1] x = uninitialized((3), dtype=REAL) + REAL_t[::1] y = uninitialized((3), dtype=REAL) + REAL_t[::1] n1 = uninitialized((3), dtype=REAL) + REAL_t[::1] n2 = uninitialized((3), dtype=REAL) + dict bfaces = {} + np.ndarray[INDEX_t, ndim=2] bfaces_mem + INDEX_t[:, ::1] bfaces_mv + tuple hv + for i in range(num_cells): + v0, v1, v2, v3 = (cells[i, 0], cells[i, 1], + cells[i, 2], cells[i, 3]) + sortFace(v0, v1, v2, f012) + sortFace(v0, v1, v3, f013) + sortFace(v0, v2, v3, f023) + sortFace(v1, v2, v3, f123) + for k in range(4): + hv = encode_face(faces[k, :]) + try: + bfaces.pop(hv) + except KeyError: + bfaces[hv] = i + bfaces_mem = uninitialized((len(bfaces), 3), dtype=INDEX) + bfaces_mv = bfaces_mem + for i, hv in enumerate(bfaces): + decode_face(hv, f) + cellNo = bfaces[hv] + + # compute face normal + for j in range(3): + x[j] = vertices[f[1], j]-vertices[f[0], j] + for j in range(3): + y[j] = vertices[f[2], j]-vertices[f[0], j] + n2[0] = x[1]*y[2]-x[2]*y[1] + n2[1] = x[2]*y[0]-x[0]*y[2] + n2[2] = x[0]*y[1]-x[1]*y[0] + + # compute vector from cell center to face center + for j in range(3): + x[j] = 0. + for k in range(3): + x[j] += vertices[f[k], j] + x[j] /= 3. + y[j] = 0. + for k in range(4): + y[j] -= vertices[cells[cellNo, k], j] + y[j] *= 0.25 + n1[j] = x[j]-y[j] + # flip the face if they point in different directions + if mydot(n2, n1) < 0: + f[1], f[2] = f[2], f[1] + for j in range(3): + bfaces_mv[i, j] = f[j] + return bfaces_mem + + def boundaryEdgesFromBoundaryFaces(INDEX_t[:, ::1] bfaces): cdef: INDEX_t nc = bfaces.shape[0] diff --git a/fem/PyNucleus_fem/vector_{SCALAR}.pxi b/fem/PyNucleus_fem/vector_{SCALAR}.pxi index cbdc22c9..a37dddc4 100644 --- a/fem/PyNucleus_fem/vector_{SCALAR}.pxi +++ b/fem/PyNucleus_fem/vector_{SCALAR}.pxi @@ -144,6 +144,9 @@ cdef class {SCALAR_label_lc_}fe_vector: assignScaled(v2.data, v1.data, alpha) return v2 + def __rmul__(self, other): + return self.__mul__(other) + def toarray(self, copy=False): return np.array(self.data, copy=copy) @@ -306,6 +309,12 @@ cdef class {SCALAR_label_lc_}fe_vector: def augmentWithBoundaryData(self, {SCALAR_label_lc_}fe_vector boundaryData): return self.dm.augmentWithBoundaryData(self, boundaryData) + def exportVTK(self, filename, label): + if isinstance(self.dm, P0_DoFMap): + self.dm.mesh.exportSolutionVTK(x=[], filename=filename, labels=[], cell_data={label: [self.toarray()]}) + else: + self.dm.mesh.exportSolutionVTK(x=[self], filename=filename, labels=[label]) + cdef void {SCALAR_label_lc_}assign_2d({SCALAR}_t[:, ::1] y, const {SCALAR}_t[:, ::1] x): cdef: diff --git a/multilevelSolver/PyNucleus_multilevelSolver/restrictionProlongation.pyx b/multilevelSolver/PyNucleus_multilevelSolver/restrictionProlongation.pyx index 35a93531..b7b0033a 100644 --- a/multilevelSolver/PyNucleus_multilevelSolver/restrictionProlongation.pyx +++ b/multilevelSolver/PyNucleus_multilevelSolver/restrictionProlongation.pyx @@ -28,11 +28,11 @@ def buildRestrictionProlongation(DoFMap coarse_DoFMap, DoFMap fine_DoFMap): if isinstance(coarse_DoFMap, P0_DoFMap): if isinstance(fine_DoFMap, P0_DoFMap): - if coarse_DoFMap.dim == 1: + if coarse_DoFMap.mesh.manifold_dim == 1: R = buildRestriction_1D_P0(coarse_DoFMap, fine_DoFMap) - elif coarse_DoFMap.dim == 2: + elif coarse_DoFMap.mesh.manifold_dim == 2: R = buildRestriction_2D_P0(coarse_DoFMap, fine_DoFMap) - elif coarse_DoFMap.dim == 3: + elif coarse_DoFMap.mesh.manifold_dim == 3: R = buildRestriction_3D_P0(coarse_DoFMap, fine_DoFMap) else: raise NotImplementedError() @@ -40,29 +40,29 @@ def buildRestrictionProlongation(DoFMap coarse_DoFMap, raise NotImplementedError() elif isinstance(coarse_DoFMap, P1_DoFMap): if isinstance(fine_DoFMap, P1_DoFMap): - if coarse_DoFMap.dim == 1: + if coarse_DoFMap.mesh.manifold_dim == 1: R = buildRestriction_1D_P1(coarse_DoFMap, fine_DoFMap) - elif coarse_DoFMap.dim == 2: + elif coarse_DoFMap.mesh.manifold_dim == 2: R = buildRestriction_2D_P1(coarse_DoFMap, fine_DoFMap) - elif coarse_DoFMap.dim == 3: + elif coarse_DoFMap.mesh.manifold_dim == 3: R = buildRestriction_3D_P1(coarse_DoFMap, fine_DoFMap) else: raise NotImplementedError() elif isinstance(fine_DoFMap, P2_DoFMap): - if coarse_DoFMap.dim == 1: + if coarse_DoFMap.mesh.manifold_dim == 1: R = buildRestriction_1D_P1_P2(coarse_DoFMap, fine_DoFMap) - elif coarse_DoFMap.dim == 2: + elif coarse_DoFMap.mesh.manifold_dim == 2: R = buildRestriction_2D_P1_P2(coarse_DoFMap, fine_DoFMap) - elif coarse_DoFMap.dim == 3: + elif coarse_DoFMap.mesh.manifold_dim == 3: R = buildRestriction_3D_P1_P2(coarse_DoFMap, fine_DoFMap) else: raise NotImplementedError() elif isinstance(fine_DoFMap, P3_DoFMap): - if coarse_DoFMap.dim == 1: + if coarse_DoFMap.mesh.manifold_dim == 1: R = buildRestriction_1D_P1_P3(coarse_DoFMap, fine_DoFMap) - elif coarse_DoFMap.dim == 2: + elif coarse_DoFMap.mesh.manifold_dim == 2: R = buildRestriction_2D_P1_P3(coarse_DoFMap, fine_DoFMap) - elif coarse_DoFMap.dim == 3: + elif coarse_DoFMap.mesh.manifold_dim == 3: R = buildRestriction_3D_P1_P3(coarse_DoFMap, fine_DoFMap) else: raise NotImplementedError() @@ -70,20 +70,20 @@ def buildRestrictionProlongation(DoFMap coarse_DoFMap, raise NotImplementedError() elif isinstance(coarse_DoFMap, P2_DoFMap): if isinstance(fine_DoFMap, P2_DoFMap): - if coarse_DoFMap.dim == 1: + if coarse_DoFMap.mesh.manifold_dim == 1: R = buildRestriction_1D_P2(coarse_DoFMap, fine_DoFMap) - elif coarse_DoFMap.dim == 2: + elif coarse_DoFMap.mesh.manifold_dim == 2: R = buildRestriction_2D_P2(coarse_DoFMap, fine_DoFMap) - elif coarse_DoFMap.dim == 3: + elif coarse_DoFMap.mesh.manifold_dim == 3: R = buildRestriction_3D_P2(coarse_DoFMap, fine_DoFMap) else: raise NotImplementedError() elif isinstance(fine_DoFMap, P3_DoFMap): - if coarse_DoFMap.dim == 1: + if coarse_DoFMap.mesh.manifold_dim == 1: R = buildRestriction_1D_P2_P3(coarse_DoFMap, fine_DoFMap) - elif coarse_DoFMap.dim == 2: + elif coarse_DoFMap.mesh.manifold_dim == 2: R = buildRestriction_2D_P2_P3(coarse_DoFMap, fine_DoFMap) - elif coarse_DoFMap.dim == 3: + elif coarse_DoFMap.mesh.manifold_dim == 3: R = buildRestriction_3D_P2_P3(coarse_DoFMap, fine_DoFMap) else: raise NotImplementedError() @@ -91,11 +91,11 @@ def buildRestrictionProlongation(DoFMap coarse_DoFMap, raise NotImplementedError() elif isinstance(coarse_DoFMap, P3_DoFMap): if isinstance(fine_DoFMap, P3_DoFMap): - if coarse_DoFMap.dim == 1: + if coarse_DoFMap.mesh.manifold_dim == 1: R = buildRestriction_1D_P3(coarse_DoFMap, fine_DoFMap) - elif coarse_DoFMap.dim == 2: + elif coarse_DoFMap.mesh.manifold_dim == 2: R = buildRestriction_2D_P3(coarse_DoFMap, fine_DoFMap) - elif coarse_DoFMap.dim == 3: + elif coarse_DoFMap.mesh.manifold_dim == 3: R = buildRestriction_3D_P3(coarse_DoFMap, fine_DoFMap) else: raise NotImplementedError() diff --git a/nl/PyNucleus_nl/clusterMethodCy.pyx b/nl/PyNucleus_nl/clusterMethodCy.pyx index 752c34e7..ea68f943 100644 --- a/nl/PyNucleus_nl/clusterMethodCy.pyx +++ b/nl/PyNucleus_nl/clusterMethodCy.pyx @@ -1101,7 +1101,7 @@ cdef class tree_node: BOOL_t assembleOnRoot=True, BOOL_t local=False): cdef: - INDEX_t i, k, I, l, j, p, dim, dof = -1, r, start, end + INDEX_t i, k, I, l, j, p, dim, manifold_dim, dof = -1, r, start, end REAL_t[:, ::1] coeff, simplex, local_vals, PHI, xi, x REAL_t[::1] eta, fvals REAL_t vol, beta, omega @@ -1112,6 +1112,7 @@ cdef class tree_node: transferMatrixBuilder tMB REAL_t[:, ::1] transferOperator dim = mesh.dim + manifold_dim = mesh.manifold_dim # Sauter Schwab p. 428 if isinstance(DoFMap, P0_DoFMap): quadOrder = order+1 @@ -1123,7 +1124,7 @@ cdef class tree_node: quadOrder = order+4 else: raise NotImplementedError() - qr = simplexXiaoGimbutas(quadOrder, dim) + qr = simplexXiaoGimbutas(quadOrder, dim, manifold_dim) # get values of basis function in quadrature nodes PHI = uninitialized((DoFMap.dofs_per_element, qr.num_nodes), dtype=REAL) @@ -1132,7 +1133,7 @@ cdef class tree_node: PHI[i, j] = DoFMap.localShapeFunctions[i](qr.nodes[:, j]) coeff = np.zeros((DoFMap.num_dofs, order**dim), dtype=REAL) - simplex = uninitialized((dim+1, dim), dtype=REAL) + simplex = uninitialized((manifold_dim+1, dim), dtype=REAL) local_vals = uninitialized((DoFMap.dofs_per_element, order**dim), dtype=REAL) eta = np.cos((2.0*np.arange(order, 0, -1, dtype=REAL)-1.0) / (2.0*order) * np.pi) @@ -1911,7 +1912,7 @@ cdef class productIterator: return True -def assembleFarFieldInteractions(Kernel kernel, dict Pfar, INDEX_t m, DoFMap dm): +def assembleFarFieldInteractions(Kernel kernel, dict Pfar, INDEX_t m, DoFMap dm, BOOL_t bemMode=False): cdef: INDEX_t lvl REAL_t[:, ::1] box1, box2, x, y @@ -1946,11 +1947,18 @@ def assembleFarFieldInteractions(Kernel kernel, dict Pfar, INDEX_t m, DoFMap dm) y[k, j] = (box2[j, 1]-box2[j, 0])*0.5 * eta_p + box2[j, 0] k += 1 cP.kernelInterpolant = uninitialized((kiSize, kiSize), dtype=REAL) - for i in range(kiSize): - for j in range(kiSize): - if kernel_variable: - kernel.evalParamsPtr(dim, &x[i, 0], &y[j, 0]) - cP.kernelInterpolant[i, j] = -2.0*kernel.evalPtr(dim, &x[i, 0], &y[j, 0]) + if not bemMode: + for i in range(kiSize): + for j in range(kiSize): + if kernel_variable: + kernel.evalParamsPtr(dim, &x[i, 0], &y[j, 0]) + cP.kernelInterpolant[i, j] = -2.0*kernel.evalPtr(dim, &x[i, 0], &y[j, 0]) + else: + for i in range(kiSize): + for j in range(kiSize): + if kernel_variable: + kernel.evalParamsPtr(dim, &x[i, 0], &y[j, 0]) + cP.kernelInterpolant[i, j] = kernel.evalPtr(dim, &x[i, 0], &y[j, 0]) cdef class H2Matrix(LinearOperator): @@ -2374,6 +2382,9 @@ cdef class DistributedH2Matrix_globalData(LinearOperator): self.comm.Allreduce(MPI.IN_PLACE, d) return d + def getMemorySize(self): + return self.localMat.getMemorySize() + cdef class DistributedLinearOperator(LinearOperator): """ @@ -2407,6 +2418,9 @@ cdef class DistributedLinearOperator(LinearOperator): def __repr__(self): return '' % (self.comm.rank, self.comm.size, self.localMat, self.lcl_dm.num_dofs) + def getMemorySize(self): + return self.localMat.getMemorySize() + cdef void setupNear(self): cdef: nearFieldClusterPair cP @@ -3284,7 +3298,7 @@ cpdef BOOL_t getAdmissibleClusters(Kernel kernel, # diam2, # diamUnion)) return False - elif (refParams.farFieldInteractionSize > n1.get_num_dofs()*n2.get_num_dofs()) and (diamUnion < horizonValue): + elif (refParams.farFieldInteractionSize > (n1.get_num_dofs())*(n2.get_num_dofs())) and (diamUnion < horizonValue): Pnear.append(nearFieldClusterPair(n1, n2)) return False elif n1.get_is_leaf(): diff --git a/nl/PyNucleus_nl/discretizedProblems.py b/nl/PyNucleus_nl/discretizedProblems.py index 47b6467c..2e6bd28a 100644 --- a/nl/PyNucleus_nl/discretizedProblems.py +++ b/nl/PyNucleus_nl/discretizedProblems.py @@ -28,6 +28,7 @@ NEUMANN, HOMOGENEOUS_NEUMANN, transientFractionalProblem) import logging +import warnings class stationaryModelSolution(classWithComputedDependencies): @@ -592,6 +593,7 @@ def report(self, group): group.add('DoFMap', str(self.dm)) group.add('Interior DoFMap', str(self.dmInterior)) group.add('Dirichlet DoFMap', str(self.dmBC)) + group.add('matrix memory size', self.A.getMemorySize()) class discretizedTransientProblem(discretizedNonlocalProblem): @@ -841,3 +843,5 @@ def report(self, group): super().report(group) group.add('dt', self.dt) group.add('numTimeSteps', self.numTimeSteps) + + diff --git a/nl/PyNucleus_nl/fractionalLaplacian1D.pxd b/nl/PyNucleus_nl/fractionalLaplacian1D.pxd index 40f65769..47082e7f 100644 --- a/nl/PyNucleus_nl/fractionalLaplacian1D.pxd +++ b/nl/PyNucleus_nl/fractionalLaplacian1D.pxd @@ -5,7 +5,7 @@ # If you want to use this code, please refer to the README.rst and LICENSE files. # ################################################################################### -from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, ENCODE_t, BOOL_t +from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, COMPLEX_t, ENCODE_t, BOOL_t from PyNucleus_fem.quadrature cimport (quadratureRule, simplexQuadratureRule, quadQuadratureRule, @@ -28,6 +28,9 @@ cdef class fractionalLaplacian1DZeroExterior(nonlocalLaplacian1D): public REAL_t[:, ::1] PHI_dist, PHI_sep, PHI_vertex dict distantPHI +cdef class singularityCancelationQuadRule1D(quadratureRule): + pass + cdef class fractionalLaplacian1D(nonlocalLaplacian1D): cdef: diff --git a/nl/PyNucleus_nl/fractionalLaplacian1D.pyx b/nl/PyNucleus_nl/fractionalLaplacian1D.pyx index 61a7ed0b..71ac2f1e 100644 --- a/nl/PyNucleus_nl/fractionalLaplacian1D.pyx +++ b/nl/PyNucleus_nl/fractionalLaplacian1D.pyx @@ -12,7 +12,6 @@ cimport numpy as np from PyNucleus_base.myTypes import INDEX, REAL from PyNucleus_base import uninitialized, uninitialized_like from PyNucleus_fem.meshCy cimport meshBase -# from . nonlocalLaplacianBase import ALL from PyNucleus_fem.quadrature cimport (simplexQuadratureRule, transformQuadratureRule, doubleSimplexQuadratureRule, @@ -36,7 +35,7 @@ cdef class fractionalLaplacian1DZeroExterior(nonlocalLaplacian1D): self.symmetricCells = False -class singularityCancelationQuadRule1D(quadratureRule): +cdef class singularityCancelationQuadRule1D(quadratureRule): def __init__(self, panelType panel, REAL_t singularity, INDEX_t quad_order_diagonal, INDEX_t quad_order_regular): cdef: INDEX_t i @@ -145,7 +144,7 @@ class singularityCancelationQuadRule1D(quadratureRule): super(singularityCancelationQuadRule1D, self).__init__(bary, weights, 2*dim) -class singularityCancelationQuadRule1D_boundary(quadratureRule): +cdef class singularityCancelationQuadRule1D_boundary(quadratureRule): def __init__(self, panelType panel, REAL_t singularity, INDEX_t quad_order_diagonal, INDEX_t quad_order_regular): cdef: INDEX_t i @@ -580,8 +579,8 @@ cdef class fractionalLaplacian1D_boundary(fractionalLaplacian1DZeroExterior): **kwargs): super(fractionalLaplacian1D_boundary, self).__init__(kernel, mesh, DoFMap, num_dofs, **kwargs) - smin = max(0.5*(-self.kernel.min_singularity-1.), 0.) - smax = max(0.5*(-self.kernel.max_singularity-1.), 0.) + smin = max(0.5*(-self.kernel.min_singularity), 0.) + smax = max(0.5*(-self.kernel.max_singularity), 0.) if target_order is None: # this is the desired local quadrature error target_order = self.DoFMap.polynomialOrder+1-smin @@ -638,7 +637,7 @@ cdef class fractionalLaplacian1D_boundary(fractionalLaplacian1DZeroExterior): sQR = self.specialQuadRules[(singularityValue, panel)] except KeyError: - if singularityValue > -1.: + if singularityValue > -1.+1e-3: qr = singularityCancelationQuadRule1D_boundary(panel, singularityValue, self.quad_order_diagonal, 1) else: qr = singularityCancelationQuadRule1D_boundary(panel, 2.+singularityValue, self.quad_order_diagonal, 1) diff --git a/nl/PyNucleus_nl/fractionalLaplacian2D.pxd b/nl/PyNucleus_nl/fractionalLaplacian2D.pxd index f6c96019..fe3b81d4 100644 --- a/nl/PyNucleus_nl/fractionalLaplacian2D.pxd +++ b/nl/PyNucleus_nl/fractionalLaplacian2D.pxd @@ -5,7 +5,7 @@ # If you want to use this code, please refer to the README.rst and LICENSE files. # ################################################################################### -from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, ENCODE_t, BOOL_t +from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, COMPLEX_t, ENCODE_t, BOOL_t from PyNucleus_fem.quadrature cimport (quadratureRule, simplexQuadratureRule, quadQuadratureRule) @@ -27,6 +27,10 @@ cdef class fractionalLaplacian2DZeroExterior(nonlocalLaplacian2D): public REAL_t[:, ::1] PHI_edge2, PHI_vertex2 +cdef class singularityCancelationQuadRule2D(quadratureRule): + pass + + cdef class fractionalLaplacian2D(nonlocalLaplacian2D): cdef: public quadratureRule qrEdge, qrVertex, qrId diff --git a/nl/PyNucleus_nl/fractionalLaplacian2D.pyx b/nl/PyNucleus_nl/fractionalLaplacian2D.pyx index f47ca733..93c5065f 100644 --- a/nl/PyNucleus_nl/fractionalLaplacian2D.pyx +++ b/nl/PyNucleus_nl/fractionalLaplacian2D.pyx @@ -36,7 +36,7 @@ cdef class fractionalLaplacian2DZeroExterior(nonlocalLaplacian2D): self.symmetricCells = False -class singularityCancelationQuadRule2D(quadratureRule): +cdef class singularityCancelationQuadRule2D(quadratureRule): def __init__(self, panelType panel, REAL_t singularity, INDEX_t quad_order_diagonal, @@ -402,7 +402,7 @@ class singularityCancelationQuadRule2D(quadratureRule): super(singularityCancelationQuadRule2D, self).__init__(bary, weights, 2*dim+1) -class singularityCancelationQuadRule2D_boundary(quadratureRule): +cdef class singularityCancelationQuadRule2D_boundary(quadratureRule): def __init__(self, panelType panel, REAL_t singularity, INDEX_t quad_order_diagonal, @@ -1146,7 +1146,7 @@ cdef class fractionalLaplacian2D_boundary(fractionalLaplacian2DZeroExterior): **kwargs): super(fractionalLaplacian2D_boundary, self).__init__(kernel, mesh, DoFMap, num_dofs, **kwargs) - smax = max(0.5*(-self.kernel.max_singularity-2.), 0.) + smax = max(0.5*(-self.kernel.max_singularity-1.), 0.) if target_order is None: # this is the desired global order wrt to the number of DoFs # target_order = (2.-s)/self.dim @@ -1171,7 +1171,7 @@ cdef class fractionalLaplacian2D_boundary(fractionalLaplacian2DZeroExterior): REAL_t logdh1 = max(log(d/h1), 0.), logdh2 = max(log(d/h2), 0.) REAL_t logh1H0 = abs(log(h1/self.H0)), logh2H0 = abs(log(h2/self.H0)) REAL_t loghminH0 = max(logh1H0, logh2H0) - REAL_t s = max(0.5*(-self.kernel.getSingularityValue()-2.), 0.) + REAL_t s = max(0.5*(-self.kernel.getSingularityValue()-1.), 0.) REAL_t h panel = max(ceil(((0.5*self.target_order+0.25)*log(self.num_dofs*self.H0**2) + loghminH0 + (s-1.)*logh2H0 - s*logdh2) / (max(logdh1, 0) + 0.35)), @@ -1206,7 +1206,7 @@ cdef class fractionalLaplacian2D_boundary(fractionalLaplacian2DZeroExterior): try: sQR = self.specialQuadRules[(singularityValue, panel)] except KeyError: - if singularityValue > -2.: + if singularityValue > -2.+1e-3: qr = singularityCancelationQuadRule2D_boundary(panel, singularityValue, self.quad_order_diagonal, self.quad_order_diagonal) else: qr = singularityCancelationQuadRule2D_boundary(panel, 2.+singularityValue, self.quad_order_diagonal, self.quad_order_diagonal) diff --git a/nl/PyNucleus_nl/fractionalOrders.pxd b/nl/PyNucleus_nl/fractionalOrders.pxd index 5d940750..043278ff 100644 --- a/nl/PyNucleus_nl/fractionalOrders.pxd +++ b/nl/PyNucleus_nl/fractionalOrders.pxd @@ -6,7 +6,7 @@ ################################################################################### cimport numpy as np -from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, BOOL_t +from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, COMPLEX_t, BOOL_t from PyNucleus_fem.functions cimport function from . twoPointFunctions cimport (twoPointFunction, constantTwoPoint, diff --git a/nl/PyNucleus_nl/fractionalOrders.pyx b/nl/PyNucleus_nl/fractionalOrders.pyx index 54bce368..b4a1f626 100644 --- a/nl/PyNucleus_nl/fractionalOrders.pyx +++ b/nl/PyNucleus_nl/fractionalOrders.pyx @@ -671,7 +671,7 @@ cdef class smoothedInnerOuterFractionalOrder(singleVariableUnsymmetricFractional cdef class feFractionalOrder(singleVariableUnsymmetricFractionalOrder): cdef: - fe_vector vec + public fe_vector vec def __init__(self, fe_vector vec, REAL_t smin, REAL_t smax): self.vec = vec diff --git a/nl/PyNucleus_nl/helpers.py b/nl/PyNucleus_nl/helpers.py index 7338409d..eec5b0da 100644 --- a/nl/PyNucleus_nl/helpers.py +++ b/nl/PyNucleus_nl/helpers.py @@ -663,3 +663,5 @@ def construct(self): d.update(self.params) A = getFracLapl(self.mesh, self.dm, self.kernel, **d) return A + + diff --git a/nl/PyNucleus_nl/interactionDomains.pxd b/nl/PyNucleus_nl/interactionDomains.pxd index d443349c..109df479 100644 --- a/nl/PyNucleus_nl/interactionDomains.pxd +++ b/nl/PyNucleus_nl/interactionDomains.pxd @@ -6,7 +6,7 @@ ################################################################################### -from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, BOOL_t +from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, COMPLEX_t, BOOL_t from . twoPointFunctions cimport parametrizedTwoPointFunction cdef enum RELATIVE_POSITION_t: diff --git a/nl/PyNucleus_nl/kernelNormalization.pxd b/nl/PyNucleus_nl/kernelNormalization.pxd index 3a3b124f..0dc6d720 100644 --- a/nl/PyNucleus_nl/kernelNormalization.pxd +++ b/nl/PyNucleus_nl/kernelNormalization.pxd @@ -6,7 +6,7 @@ ################################################################################### cimport numpy as np -from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, BOOL_t +from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, COMPLEX_t, BOOL_t from PyNucleus_fem.functions cimport function from . twoPointFunctions cimport (twoPointFunction, constantTwoPoint, @@ -84,3 +84,5 @@ cdef class constantIntegrableScaling(constantTwoPoint): INDEX_t dim REAL_t horizon interactionDomain interaction + + diff --git a/nl/PyNucleus_nl/kernelNormalization.pyx b/nl/PyNucleus_nl/kernelNormalization.pyx index d54c678f..9dfdc973 100644 --- a/nl/PyNucleus_nl/kernelNormalization.pyx +++ b/nl/PyNucleus_nl/kernelNormalization.pyx @@ -416,3 +416,5 @@ cdef class variableFractionalLaplacianScalingWithDifferentHorizon(variableFracti def __setstate__(self, state): variableFractionalLaplacianScalingWithDifferentHorizon.__init__(self, state[0], state[1], state[2], state[3], state[4]) + + diff --git a/nl/PyNucleus_nl/kernel_params.pxi b/nl/PyNucleus_nl/kernel_params.pxi index 144d22f2..f5b4abb9 100644 --- a/nl/PyNucleus_nl/kernel_params.pxi +++ b/nl/PyNucleus_nl/kernel_params.pxi @@ -9,7 +9,7 @@ cdef enum: OFFSET = sizeof(void*) cdef enum: - NUM_KERNEL_PARAMS = 11 + NUM_KERNEL_PARAMS = 12 cdef enum kernelParams: fS = 0*OFFSET @@ -24,6 +24,7 @@ cdef enum kernelParams: fINTERACTION = 9*OFFSET fEXPONENTINVERSE = 10*OFFSET fTEMPERED=10*OFFSET + fGREENS_LAMBDA=10*OFFSET cdef inline BOOL_t isNull(void *c_params, size_t pos): @@ -41,6 +42,12 @@ cdef inline REAL_t getREAL(void *c_params, size_t pos): cdef inline void setREAL(void *c_params, size_t pos, REAL_t val): ((c_params+pos))[0] = val +cdef inline COMPLEX_t getCOMPLEX(void *c_params, size_t pos): + return ((c_params+pos))[0] + +cdef inline void setCOMPLEX(void *c_params, size_t pos, COMPLEX_t val): + ((c_params+pos))[0] = val + ctypedef REAL_t (*fun_t)(REAL_t *x, REAL_t *y, void *c_params) cdef inline void setFun(void *c_params, size_t pos, fun_t val): @@ -49,6 +56,14 @@ cdef inline void setFun(void *c_params, size_t pos, fun_t val): cdef inline fun_t getFun(void *c_params, size_t pos): return ((c_params+pos))[0] +ctypedef COMPLEX_t (*complex_fun_t)(REAL_t *x, REAL_t *y, void *c_params) + +cdef inline void setComplexFun(void *c_params, size_t pos, complex_fun_t val): + ((c_params+pos))[0] = val + +cdef inline complex_fun_t getComplexFun(void *c_params, size_t pos): + return ((c_params+pos))[0] + cdef inline REAL_t* getREALArray1D(void *c_params, size_t pos): return ((c_params+pos))[0] @@ -67,3 +82,7 @@ cpdef enum: INDICATOR = 1 PERIDYNAMIC = 2 GAUSSIAN = 3 + LOGINVERSEDISTANCE = 4 + MONOMIAL = 5 + GREENS_2D = 6 + GREENS_3D = 7 diff --git a/nl/PyNucleus_nl/kernels.py b/nl/PyNucleus_nl/kernels.py index 2bf053c2..3e498b67 100644 --- a/nl/PyNucleus_nl/kernels.py +++ b/nl/PyNucleus_nl/kernels.py @@ -18,15 +18,22 @@ from . fractionalOrders import (fractionalOrderBase, constFractionalOrder, variableConstFractionalOrder, - singleVariableTwoPointFunction) + singleVariableUnsymmetricFractionalOrder) from . kernelNormalization import (constantFractionalLaplacianScaling, constantFractionalLaplacianScalingDerivative, variableFractionalLaplacianScaling, - constantIntegrableScaling) + constantIntegrableScaling, + ) from . kernelsCy import (Kernel, + ComplexKernel, FractionalKernel, RangedFractionalKernel, FRACTIONAL, + PERIDYNAMIC, + LOGINVERSEDISTANCE, + GREENS_2D, + GREENS_3D, + MONOMIAL, getKernelEnum) from . operatorInterpolation import admissibleSet import warnings @@ -133,7 +140,7 @@ def getFractionalKernel(dim, scaling = constantFractionalLaplacianScalingDerivative(dim, sFun.value, horizonFun.value, normalized, boundary, derivative, tempered) else: symmetric = sFun.symmetric and isinstance(horizonFun, constant) - if piecewise and isinstance(sFun, singleVariableTwoPointFunction): + if piecewise and isinstance(sFun, singleVariableUnsymmetricFractionalOrder): warnings.warn('Variable s kernels cannot be piecewise. Switching to piecewise == False.') piecewise = False scaling = variableFractionalLaplacianScaling(symmetric, normalized, boundary, derivative) @@ -159,7 +166,8 @@ def getIntegrableKernel(dim, normalized=True, piecewise=True, phi=None, - boundary=False): + boundary=False, + monomialPower=np.nan): dim_ = _getDim(dim) kType = _getKernelType(kernel) horizonFun = _getHorizon(horizon) @@ -173,7 +181,7 @@ def getIntegrableKernel(dim, raise NotImplementedError() else: scaling = constantTwoPoint(0.5) - return Kernel(dim_, kType=kType, horizon=horizonFun, interaction=interaction, scaling=scaling, phi=phi, piecewise=piecewise, boundary=boundary) + return Kernel(dim_, kType=kType, horizon=horizonFun, interaction=interaction, scaling=scaling, phi=phi, piecewise=piecewise, boundary=boundary, monomialPower=monomialPower) def getKernel(dim, diff --git a/nl/PyNucleus_nl/kernelsCy.pxd b/nl/PyNucleus_nl/kernelsCy.pxd index e3ac4a1c..87c22025 100644 --- a/nl/PyNucleus_nl/kernelsCy.pxd +++ b/nl/PyNucleus_nl/kernelsCy.pxd @@ -5,9 +5,9 @@ # If you want to use this code, please refer to the README.rst and LICENSE files. # ################################################################################### -from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, BOOL_t +from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, COMPLEX_t, BOOL_t from PyNucleus_fem.functions cimport function -from . twoPointFunctions cimport twoPointFunction, constantTwoPoint, parametrizedTwoPointFunction +from . twoPointFunctions cimport twoPointFunction, ComplextwoPointFunction, constantTwoPoint, parametrizedTwoPointFunction from . interactionDomains cimport interactionDomain from . fractionalOrders cimport fractionalOrderBase @@ -15,6 +15,7 @@ include "kernel_params_decl.pxi" ctypedef REAL_t (*kernel_fun_t)(REAL_t *x, REAL_t *y, void* user_data) +ctypedef COMPLEX_t (*complex_kernel_fun_t)(REAL_t *x, REAL_t *y, void* user_data) cdef class Kernel(twoPointFunction): @@ -45,6 +46,7 @@ cdef class Kernel(twoPointFunction): cdef REAL_t getHorizonValue2(self) cdef REAL_t getScalingValue(self) cdef void setScalingValue(self, REAL_t scaling) + cdef void evalParamsOnSimplices(self, REAL_t[::1] center1, REAL_t[::1] center2, REAL_t[:, ::1] simplex1, REAL_t[:, ::1] simplex2) cdef void evalParams(self, REAL_t[::1] x, REAL_t[::1] y) cdef void evalParamsPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y) cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y) @@ -53,6 +55,43 @@ cdef class Kernel(twoPointFunction): cdef void evalVectorPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, INDEX_t vectorSize, REAL_t* vec) +cdef class ComplexKernel(ComplextwoPointFunction): + cdef: + public INDEX_t dim + public kernelType kernelType + public REAL_t min_singularity + public REAL_t max_singularity + public function horizon + public interactionDomain interaction + public twoPointFunction scaling + public twoPointFunction phi + public BOOL_t variableSingularity + public BOOL_t variableHorizon + public BOOL_t finiteHorizon + public BOOL_t complement + public BOOL_t variableScaling + public BOOL_t variable + public BOOL_t piecewise + public BOOL_t boundary + public INDEX_t vectorSize + complex_kernel_fun_t kernelFun + void *c_kernel_params + cdef REAL_t getSingularityValue(self) + cdef void setSingularityValue(self, REAL_t singularity) + cdef REAL_t getHorizonValue(self) + cdef void setHorizonValue(self, REAL_t horizon) + cdef REAL_t getHorizonValue2(self) + cdef REAL_t getScalingValue(self) + cdef void setScalingValue(self, REAL_t scaling) + cdef void evalParamsOnSimplices(self, REAL_t[::1] center1, REAL_t[::1] center2, REAL_t[:, ::1] simplex1, REAL_t[:, ::1] simplex2) + cdef void evalParams(self, REAL_t[::1] x, REAL_t[::1] y) + cdef void evalParamsPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y) + cdef COMPLEX_t eval(self, REAL_t[::1] x, REAL_t[::1] y) + cdef void evalVector(self, REAL_t[::1] x, REAL_t[::1] y, COMPLEX_t[::1] vec) + cdef COMPLEX_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y) + cdef void evalVectorPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, INDEX_t vectorSize, COMPLEX_t* vec) + + cdef class FractionalKernel(Kernel): cdef: public fractionalOrderBase s diff --git a/nl/PyNucleus_nl/kernelsCy.pyx b/nl/PyNucleus_nl/kernelsCy.pyx index 684ef30a..36e1252d 100644 --- a/nl/PyNucleus_nl/kernelsCy.pyx +++ b/nl/PyNucleus_nl/kernelsCy.pyx @@ -10,7 +10,8 @@ from libc.math cimport (sin, cos, sinh, cosh, tanh, sqrt, atan, atan2, log, ceil, fabs as abs, M_PI as pi, pow, exp) -from scipy.special.cython_special cimport gammaincc, gamma +from PyNucleus_base.blas cimport mydot +from scipy.special.cython_special cimport gammaincc, gamma, hankel1 import numpy as np cimport numpy as np from PyNucleus_base.myTypes import REAL @@ -34,6 +35,10 @@ cdef inline REAL_t gammainc(REAL_t a, REAL_t x): return gamma(a)*gammaincc(a, x) +cdef inline COMPLEX_t hankel10complex(REAL_t x): + return 1j*hankel1(0., x) + + include "kernel_params.pxi" @@ -46,6 +51,12 @@ def getKernelEnum(str kernelTypeString): return PERIDYNAMIC elif kernelTypeString.upper() == "GAUSSIAN": return GAUSSIAN + elif kernelTypeString.upper() == "LOGINVERSEDISTANCE": + return LOGINVERSEDISTANCE + elif kernelTypeString.upper() == "MONOMIAL": + return MONOMIAL + elif kernelTypeString.upper() == "GREENS_2D": + return GREENS_2D else: raise NotImplementedError(kernelTypeString) @@ -312,6 +323,19 @@ cdef REAL_t peridynamicKernel2D(REAL_t *x, REAL_t *y, void *c_params): return 0. +cdef REAL_t peridynamicKernel3D(REAL_t *x, REAL_t *y, void *c_params): + cdef: + interactionDomain interaction = (((c_params+fINTERACTION))[0]) + REAL_t C + REAL_t d2 + if interaction.evalPtr(3, x, y) != 0.: + d2 = (x[0]-y[0])*(x[0]-y[0]) + (x[1]-y[1])*(x[1]-y[1]) + (x[2]-y[2])*(x[2]-y[2]) + C = getREAL(c_params, fSCALING) + return C/sqrt(d2) + else: + return 0. + + cdef REAL_t peridynamicKernel1Dboundary(REAL_t *x, REAL_t *y, void *c_params): cdef: interactionDomain interaction = (((c_params+fINTERACTION))[0]) @@ -391,6 +415,46 @@ cdef REAL_t gaussianKernel2Dboundary(REAL_t *x, REAL_t *y, void *c_params): return 0. +cdef REAL_t logInverseDistance2D(REAL_t *x, REAL_t *y, void *c_params): + cdef: + REAL_t C = getREAL(c_params, fSCALING) + REAL_t d2 + d2 = (x[0]-y[0])*(x[0]-y[0]) + (x[1]-y[1])*(x[1]-y[1]) + C = getREAL(c_params, fSCALING) + return -0.5*C*log(d2) + + +cdef COMPLEX_t greens2Dcomplex(REAL_t *x, REAL_t *y, void *c_params): + cdef: + REAL_t C = getREAL(c_params, fSCALING) + REAL_t lam = getREAL(c_params, fGREENS_LAMBDA) + REAL_t d2 + d2 = (x[0]-y[0])*(x[0]-y[0]) + (x[1]-y[1])*(x[1]-y[1]) + C = getREAL(c_params, fSCALING) + return C*hankel10complex(lam*sqrt(d2)) + + +cdef COMPLEX_t greens3Dcomplex(REAL_t *x, REAL_t *y, void *c_params): + cdef: + REAL_t C = getREAL(c_params, fSCALING) + COMPLEX_t lam = getCOMPLEX(c_params, fGREENS_LAMBDA) + REAL_t d2 + d2 = (x[0]-y[0])*(x[0]-y[0]) + (x[1]-y[1])*(x[1]-y[1]) + (x[2]-y[2])*(x[2]-y[2]) + C = getREAL(c_params, fSCALING) + d2 = sqrt(d2) + return C*exp(-lam.real*d2)*(cos(-lam.imag*d2)+1j*sin(-lam.imag*d2))/d2 + + +cdef REAL_t monomial3D(REAL_t *x, REAL_t *y, void *c_params): + cdef: + REAL_t C = getREAL(c_params, fSCALING) + REAL_t singularityValue = getREAL(c_params, fSINGULARITY) + REAL_t d2 + d2 = (x[0]-y[0])*(x[0]-y[0]) + (x[1]-y[1])*(x[1]-y[1]) + (x[2]-y[2])*(x[2]-y[2]) + C = getREAL(c_params, fSCALING) + return C*pow(d2, 0.5*singularityValue) + + cdef REAL_t updateAndEvalIntegrable(REAL_t *x, REAL_t *y, void *c_params): cdef: INDEX_t dim = getINDEX(c_params, fKDIM) @@ -414,6 +478,31 @@ cdef REAL_t updateAndEvalIntegrable(REAL_t *x, REAL_t *y, void *c_params): return kernel(x, y, c_params) +cdef COMPLEX_t updateAndEvalIntegrableComplex(REAL_t *x, REAL_t *y, void *c_params): + cdef: + INDEX_t dim = getINDEX(c_params, fKDIM) + REAL_t[::1] xA + function horizonFun + twoPointFunction scalingFun + REAL_t horizon, C + complex_fun_t kernel = getComplexFun(c_params, fEVAL) + BOOL_t horizonFunNull = isNull(c_params, fHORIZONFUN) + BOOL_t scalingFunNull = isNull(c_params, fSCALINGFUN) + if not horizonFunNull or not scalingFunNull: + xA = x + if not horizonFunNull: + horizonFun = (((c_params+fHORIZONFUN))[0]) + horizon = horizonFun.eval(xA) + setREAL(c_params, fHORIZON2, horizon*horizon) + if not scalingFunNull: + scalingFun = (((c_params+fSCALINGFUN))[0]) + C = scalingFun.evalPtr(dim, x, y) + setREAL(c_params, fSCALING, C) + return kernel(x, y, c_params) + + + + cdef REAL_t updateAndEvalFractional(REAL_t *x, REAL_t *y, void *c_params): cdef: INDEX_t dim = getINDEX(c_params, fKDIM) @@ -443,7 +532,7 @@ cdef REAL_t updateAndEvalFractional(REAL_t *x, REAL_t *y, void *c_params): cdef class Kernel(twoPointFunction): """A kernel functions that can be used to define a nonlocal operator.""" - def __init__(self, INDEX_t dim, kernelType kType, function horizon, interactionDomain interaction, twoPointFunction scaling, twoPointFunction phi, BOOL_t piecewise=True, BOOL_t boundary=False, INDEX_t vectorSize=1): + def __init__(self, INDEX_t dim, kernelType kType, function horizon, interactionDomain interaction, twoPointFunction scaling, twoPointFunction phi, BOOL_t piecewise=True, BOOL_t boundary=False, INDEX_t vectorSize=1, **kwargs): cdef: parametrizedTwoPointFunction parametrizedScaling int i @@ -474,6 +563,15 @@ cdef class Kernel(twoPointFunction): self.min_singularity = 0. self.max_singularity = 0. self.singularityValue = 0. + elif self.kernelType == LOGINVERSEDISTANCE: + self.min_singularity = 0. + self.max_singularity = 0. + self.singularityValue = 0. + elif self.kernelType == MONOMIAL: + monomialPower = kwargs.get('monomialPower', np.nan) + self.min_singularity = monomialPower + self.max_singularity = monomialPower + self.singularityValue = monomialPower self.horizon = horizon self.variableHorizon = not isinstance(self.horizon, constant) @@ -524,8 +622,13 @@ cdef class Kernel(twoPointFunction): self.kernelFun = peridynamicKernel2D elif self.kernelType == GAUSSIAN: self.kernelFun = gaussianKernel2D + elif self.kernelType == LOGINVERSEDISTANCE: + self.kernelFun = logInverseDistance2D elif dim == 3: - pass + if self.kernelType == PERIDYNAMIC: + self.kernelFun = peridynamicKernel3D + elif self.kernelType == MONOMIAL: + self.kernelFun = monomial3D else: raise NotImplementedError() else: @@ -565,8 +668,13 @@ cdef class Kernel(twoPointFunction): setFun(self.c_kernel_params, fEVAL, peridynamicKernel2D) elif self.kernelType == GAUSSIAN: setFun(self.c_kernel_params, fEVAL, gaussianKernel2D) + elif self.kernelType == LOGINVERSEDISTANCE: + setFun(self.c_kernel_params, fEVAL, logInverseDistance2D) elif dim == 3: - pass + if self.kernelType == PERIDYNAMIC: + setFun(self.c_kernel_params, fEVAL, peridynamicKernel3D) + elif self.kernelType == MONOMIAL: + setFun(self.c_kernel_params, fEVAL, monomial3D) else: raise NotImplementedError() else: @@ -645,6 +753,11 @@ cdef class Kernel(twoPointFunction): cdef void setScalingValue(self, REAL_t scaling): setREAL(self.c_kernel_params, fSCALING, scaling) + cdef void evalParamsOnSimplices(self, REAL_t[::1] center1, REAL_t[::1] center2, REAL_t[:, ::1] simplex1, REAL_t[:, ::1] simplex2): + # Set the horizon. + if self.variableHorizon: + self.horizonValue = self.horizon.eval(center1) + cdef void evalParams(self, REAL_t[::1] x, REAL_t[::1] y): if self.piecewise: if self.variableHorizon: @@ -735,15 +848,347 @@ cdef class Kernel(twoPointFunction): kernelName = 'peridynamic' elif self.kernelType == GAUSSIAN: kernelName = 'Gaussian' + elif self.kernelType == LOGINVERSEDISTANCE: + kernelName = 'logInverseDistance' + elif self.kernelType == MONOMIAL: + kernelName = 'monomial' + else: + raise NotImplementedError() + return "{}({}{}, {}, {})".format(self.__class__.__name__, kernelName, '' if not self.boundary else '-boundary', repr(self.interaction), self.scaling) + + def __getstate__(self): + return (self.dim, self.kernelType, self.horizon, self.interaction, self.scaling, self.phi, self.piecewise, self.boundary, self.singularityValue) + + def __setstate__(self, state): + Kernel.__init__(self, state[0], state[1], state[2], state[3], state[4], state[5], state[6], state[7], state[8]) + + def plot(self, x0=None): + "Plot the kernel function." + from matplotlib import ticker + import matplotlib.pyplot as plt + if x0 is None: + x0 = np.zeros((self.dim), dtype=REAL) + self.evalParams(x0, x0) + if self.finiteHorizon: + delta = self.horizonValue + else: + delta = 2. + x = np.linspace(-1.1*delta, 1.1*delta, 201) + if self.dim == 1: + vals = np.zeros_like(x) + for i in range(x.shape[0]): + y = x0+np.array([x[i]], dtype=REAL) + if np.linalg.norm(x0-y) > 1e-9 or self.singularityValue >= 0: + vals[i] = self(x0, y) + else: + vals[i] = np.nan + plt.plot(x, vals) + plt.yscale('log') + if not self.finiteHorizon: + plt.xlim([x[0], x[x.shape[0]-1]]) + if self.singularityValue < 0: + plt.ylim(top=np.nanmax(vals)) + plt.xlabel('$x-y$') + elif self.dim == 2: + X, Y = np.meshgrid(x, x) + Z = np.zeros_like(X) + for i in range(x.shape[0]): + for j in range(x.shape[0]): + y = x0+np.array([x[i], x[j]], dtype=REAL) + if np.linalg.norm(x0-y) > 1e-9 or self.singularityValue >= 0: + Z[i,j] = self(x0, y) + else: + Z[i,j] = np.nan + levels = np.logspace(np.log10(Z[np.absolute(Z)>0].min()), + np.log10(Z[np.absolute(Z)>0].max()), 10) + if levels[0] < levels[levels.shape[0]-1]: + plt.contourf(X, Y, Z, locator=ticker.LogLocator(), + levels=levels) + else: + plt.contourf(X, Y, Z) + plt.axis('equal') + plt.colorbar() + plt.xlabel('$x_1-y_1$') + plt.ylabel('$x_2-y_2$') + + def getBoundaryKernel(self): + "Get the boundary kernel. This is the kernel that corresponds to the elimination of a subdomain via Gauss theorem." + cdef: + Kernel newKernel + from copy import deepcopy + + scaling = deepcopy(self.scaling) + if self.phi is not None: + phi = deepcopy(self.phi) + else: + phi = None + + from . kernels import getIntegrableKernel + newKernel = getIntegrableKernel(kernel=self.kernelType, + dim=self.dim, + horizon=deepcopy(self.horizon), + interaction=None, + scaling=scaling, + phi=phi, + piecewise=self.piecewise, + boundary=True) + setREAL(newKernel.c_kernel_params, fEXPONENTINVERSE, getREAL(self.c_kernel_params, fEXPONENTINVERSE)) + return newKernel + + +cdef class ComplexKernel(ComplextwoPointFunction): + """A kernel functions that can be used to define a nonlocal operator.""" + + def __init__(self, INDEX_t dim, kernelType kType, function horizon, interactionDomain interaction, twoPointFunction scaling, twoPointFunction phi, BOOL_t piecewise=True, BOOL_t boundary=False, INDEX_t vectorSize=1, **kwargs): + cdef: + parametrizedTwoPointFunction parametrizedScaling + int i + + self.dim = dim + self.vectorSize = vectorSize + self.kernelType = kType + self.piecewise = piecewise + self.boundary = boundary + + self.c_kernel_params = malloc(NUM_KERNEL_PARAMS*OFFSET) + for i in range(NUM_KERNEL_PARAMS): + ((self.c_kernel_params+i*OFFSET))[0] = NULL + setINDEX(self.c_kernel_params, fKDIM, dim) + + symmetric = isinstance(horizon, constant) and scaling.symmetric + super(ComplexKernel, self).__init__(symmetric) + + if self.kernelType == GREENS_2D: + greensLambda = kwargs.get('greens2D_lambda', np.nan) + setREAL(self.c_kernel_params, fGREENS_LAMBDA, -greensLambda.imag) + self.min_singularity = 0. + self.max_singularity = 0. + self.singularityValue = 0. + elif self.kernelType == GREENS_3D: + greensLambda = kwargs.get('greens3D_lambda', np.nan) + setCOMPLEX(self.c_kernel_params, fGREENS_LAMBDA, greensLambda) + self.min_singularity = -1. + self.max_singularity = -1. + self.singularityValue = -1. + + self.horizon = horizon + self.variableHorizon = not isinstance(self.horizon, constant) + if self.variableHorizon: + self.horizonValue2 = np.nan + self.finiteHorizon = True + ((self.c_kernel_params+fHORIZONFUN))[0] = horizon + else: + self.horizonValue = self.horizon.value + self.finiteHorizon = self.horizon.value != np.inf + if self.kernelType == GAUSSIAN: + setREAL(self.c_kernel_params, fEXPONENTINVERSE, 1.0/(self.horizonValue/3.)**2) + + self.interaction = interaction + self.complement = self.interaction.complement + ((self.c_kernel_params+fINTERACTION))[0] = self.interaction + self.interaction.setParams(self.c_kernel_params) + + self.phi = phi + if phi is not None: + scaling = phi*scaling + self.scaling = scaling + self.variableScaling = not isinstance(self.scaling, (constantFractionalLaplacianScaling, constantTwoPoint)) + if self.variableScaling: + if isinstance(self.scaling, parametrizedTwoPointFunction): + parametrizedScaling = self.scaling + parametrizedScaling.setParams(self.c_kernel_params) + self.scalingValue = np.nan + ((self.c_kernel_params+fSCALINGFUN))[0] = self.scaling + else: + self.scalingValue = self.scaling.value + + self.variable = self.variableHorizon or self.variableScaling + + if self.piecewise: + if not self.boundary: + if dim == 2: + if self.kernelType == GREENS_2D: + self.kernelFun = greens2Dcomplex + elif dim == 3: + if self.kernelType == GREENS_3D: + self.kernelFun = greens3Dcomplex + else: + raise NotImplementedError() + else: + raise NotImplementedError() + else: + self.kernelFun = updateAndEvalIntegrableComplex + + if not self.boundary: + if dim == 2: + if self.kernelType == GREENS_2D: + setComplexFun(self.c_kernel_params, fEVAL, greens2Dcomplex) + elif dim == 2: + if self.kernelType == GREENS_3D: + setComplexFun(self.c_kernel_params, fEVAL, greens3Dcomplex) + else: + raise NotImplementedError() + else: + raise NotImplementedError() + + @property + def singularityValue(self): + "The order of the singularity." + return getREAL(self.c_kernel_params, fSINGULARITY) + + @singularityValue.setter + def singularityValue(self, REAL_t singularity): + setREAL(self.c_kernel_params, fSINGULARITY, singularity) + + cdef REAL_t getSingularityValue(self): + return getREAL(self.c_kernel_params, fSINGULARITY) + + cdef void setSingularityValue(self, REAL_t singularity): + setREAL(self.c_kernel_params, fSINGULARITY, singularity) + + @property + def horizonValue(self): + "The value of the interaction horizon." + return sqrt(getREAL(self.c_kernel_params, fHORIZON2)) + + @horizonValue.setter + def horizonValue(self, REAL_t horizon): + setREAL(self.c_kernel_params, fHORIZON2, horizon**2) + + cdef REAL_t getHorizonValue(self): + return sqrt(getREAL(self.c_kernel_params, fHORIZON2)) + + cdef void setHorizonValue(self, REAL_t horizon): + setREAL(self.c_kernel_params, fHORIZON2, horizon**2) + + @property + def horizonValue2(self): + return getREAL(self.c_kernel_params, fHORIZON2) + + cdef REAL_t getHorizonValue2(self): + return getREAL(self.c_kernel_params, fHORIZON2) + + @horizonValue2.setter + def horizonValue2(self, REAL_t horizon2): + setREAL(self.c_kernel_params, fHORIZON2, horizon2) + + @property + def scalingValue(self): + "The value of the scaling factor." + return getREAL(self.c_kernel_params, fSCALING) + + @scalingValue.setter + def scalingValue(self, REAL_t scaling): + setREAL(self.c_kernel_params, fSCALING, scaling) + + cdef REAL_t getScalingValue(self): + return getREAL(self.c_kernel_params, fSCALING) + + cdef void setScalingValue(self, REAL_t scaling): + setREAL(self.c_kernel_params, fSCALING, scaling) + + cdef void evalParamsOnSimplices(self, REAL_t[::1] center1, REAL_t[::1] center2, REAL_t[:, ::1] simplex1, REAL_t[:, ::1] simplex2): + # Set the horizon. + if self.variableHorizon: + self.horizonValue = self.horizon.eval(center1) + + cdef void evalParams(self, REAL_t[::1] x, REAL_t[::1] y): + if self.piecewise: + if self.variableHorizon: + self.horizonValue = self.horizon.eval(x) + if self.kernelType == GAUSSIAN: + setREAL(self.c_kernel_params, fEXPONENTINVERSE, 1.0/(self.horizonValue/3.)**2) + if self.variableScaling: + self.scalingValue = self.scaling.eval(x, y) + + def evalParams_py(self, REAL_t[::1] x, REAL_t[::1] y): + "Evaluate the kernel parameters." + if self.piecewise: + self.evalParams(x, y) + else: + if self.variableHorizon: + self.horizonValue = self.horizon.eval(x) + if self.kernelType == GAUSSIAN: + setREAL(self.c_kernel_params, fEXPONENTINVERSE, 1.0/(self.horizonValue/3.)**2) + if self.variableScaling: + self.scalingValue = self.scaling.eval(x, y) + + cdef void evalParamsPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + cdef: + REAL_t[::1] xA + if self.piecewise: + if self.variableHorizon: + xA = x + self.horizonValue = self.horizon.eval(xA) + if self.kernelType == GAUSSIAN: + setREAL(self.c_kernel_params, fEXPONENTINVERSE, 1.0/(self.horizonValue/3.)**2) + if self.variableScaling: + self.scalingValue = self.scaling.evalPtr(dim, x, y) + + cdef COMPLEX_t eval(self, REAL_t[::1] x, REAL_t[::1] y): + return self.kernelFun(&x[0], &y[0], self.c_kernel_params) + + cdef void evalVector(self, REAL_t[::1] x, REAL_t[::1] y, COMPLEX_t[::1] vec): + vec[0] = self.kernelFun(&x[0], &y[0], self.c_kernel_params) + + cdef COMPLEX_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + return self.kernelFun(x, y, self.c_kernel_params) + + cdef void evalVectorPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y, INDEX_t vectorSize, COMPLEX_t* vec): + vec[0] = self.kernelFun(x, y, self.c_kernel_params) + + def __call__(self, REAL_t[::1] x, REAL_t[::1] y, BOOL_t callEvalParams=True): + "Evaluate the kernel." + if self.piecewise and callEvalParams: + self.evalParams(x, y) + return self.kernelFun(&x[0], &y[0], self.c_kernel_params) + + def evalVector_py(self, REAL_t[::1] x, REAL_t[::1] y, COMPLEX_t[::1] vec, BOOL_t callEvalParams=True): + "Evaluate the kernel." + if self.piecewise and callEvalParams: + self.evalParams(x, y) + self.evalVector(x, y, vec) + + def getModifiedKernel(self, + function horizon=None, + twoPointFunction scaling=None): + cdef: + Kernel newKernel + if horizon is None: + horizon = self.horizon + interaction = self.interaction + else: + if scaling is None and isinstance(self.scaling, variableFractionalLaplacianScaling): + scaling = self.scaling.getScalingWithDifferentHorizon() + interaction = type(self.interaction)() + if scaling is None: + scaling = self.scaling + from . kernels import getKernel + newKernel = getKernel(dim=self.dim, kernel=self.kernelType, horizon=horizon, interaction=interaction, scaling=scaling, piecewise=self.piecewise) + setREAL(newKernel.c_kernel_params, fEXPONENTINVERSE, getREAL(self.c_kernel_params, fEXPONENTINVERSE)) + return newKernel + + def getComplementKernel(self): + "Get the complement kernel." + raise NotImplementedError() + from . kernels import getKernel + newKernel = getKernel(dim=self.dim, kernel=self.kernelType, horizon=self.horizon, interaction=self.interaction.getComplement(), scaling=self.scaling, piecewise=self.piecewise) + return newKernel + + def __repr__(self): + if self.kernelType == GREENS_2D: + kernelName = 'greens2D' + if self.kernelType == GREENS_3D: + kernelName = 'greens3D' else: raise NotImplementedError() return "{}({}{}, {}, {})".format(self.__class__.__name__, kernelName, '' if not self.boundary else '-boundary', repr(self.interaction), self.scaling) def __getstate__(self): - return (self.dim, self.kernelType, self.horizon, self.interaction, self.scaling, self.phi, self.piecewise, self.boundary) + return (self.dim, self.kernelType, self.horizon, self.interaction, self.scaling, self.phi, self.piecewise, self.boundary, self.singularityValue) def __setstate__(self, state): - Kernel.__init__(self, state[0], state[1], state[2], state[3], state[4], state[5], state[6], state[7]) + Kernel.__init__(self, state[0], state[1], state[2], state[3], state[4], state[5], state[6], state[7], state[8]) def plot(self, x0=None): "Plot the kernel function." @@ -1024,6 +1469,28 @@ cdef class FractionalKernel(Kernel): cdef void settemperedValue(self, REAL_t tempered): setREAL(self.c_kernel_params, fTEMPERED, tempered) + cdef void evalParamsOnSimplices(self, REAL_t[::1] center1, REAL_t[::1] center2, REAL_t[:, ::1] simplex1, REAL_t[:, ::1] simplex2): + # Set the max singularity and the horizon. + cdef: + REAL_t sValue + if self.variableOrder: + if self.s.symmetric: + sValue = self.s.eval(center1, center2) + else: + sValue = 0. + sValue = max(sValue, self.s.eval(center1, center2)) + sValue = max(sValue, self.s.eval(center2, center1)) + for i in range(simplex1.shape[0]): + sValue = max(sValue, self.s.eval(simplex1[i,:], center2)) + for i in range(simplex2.shape[0]): + sValue = max(sValue, self.s.eval(simplex2[i,:], center1)) + if not self.boundary: + self.setSingularityValue(-self.dim-2*sValue) + else: + self.setSingularityValue(1-self.dim-2*sValue) + if self.variableHorizon: + self.horizonValue = self.horizon.eval(center1) + cdef void evalParams(self, REAL_t[::1] x, REAL_t[::1] y): cdef: REAL_t sValue, scalingValue @@ -1040,16 +1507,24 @@ cdef class FractionalKernel(Kernel): if self.variableScaling: scalingValue = self.scaling.eval(x, y) self.setScalingValue(scalingValue) - else: + + cdef void evalParamsPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + cdef: + REAL_t[::1] xA + REAL_t sValue, scalingValue + if self.piecewise: if self.variableOrder: - sValue = self.s.eval(x, y) - self.setsValue(sValue) + sValue = self.s.evalPtr(dim, x, y) if not self.boundary: self.setSingularityValue(-self.dim-2*sValue) else: self.setSingularityValue(1-self.dim-2*sValue) + self.setsValue(sValue) + if self.variableHorizon: + xA = x + self.horizonValue = self.horizon.eval(xA) if self.variableScaling: - scalingValue = self.scaling.eval(x, y) + scalingValue = self.scaling.evalPtr(dim, x, y) self.setScalingValue(scalingValue) def evalParams_py(self, REAL_t[::1] x, REAL_t[::1] y): @@ -1071,22 +1546,6 @@ cdef class FractionalKernel(Kernel): scalingValue = self.scaling.eval(x, y) self.setScalingValue(scalingValue) - cdef void evalParamsPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): - cdef: - REAL_t[::1] xA - REAL_t sValue, scalingValue - if self.piecewise: - if self.variableOrder: - sValue = self.s.evalPtr(dim, x, y) - self.setsValue(sValue) - self.setSingularityValue(-self.dim-2*sValue) - if self.variableHorizon: - xA = x - self.horizonValue = self.horizon.eval(xA) - if self.variableScaling: - scalingValue = self.scaling.evalPtr(dim, x, y) - self.setScalingValue(scalingValue) - cdef void evalVector(self, REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] vec): cdef: INDEX_t i diff --git a/nl/PyNucleus_nl/nonlocalLaplacian.pxd b/nl/PyNucleus_nl/nonlocalLaplacian.pxd index 11cd8aab..25ef59fc 100644 --- a/nl/PyNucleus_nl/nonlocalLaplacian.pxd +++ b/nl/PyNucleus_nl/nonlocalLaplacian.pxd @@ -5,7 +5,7 @@ # If you want to use this code, please refer to the README.rst and LICENSE files. # ################################################################################### -from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, ENCODE_t, BOOL_t +from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, COMPLEX_t, ENCODE_t, BOOL_t from PyNucleus_base.tupleDict cimport indexSet, indexSetIterator, arrayIndexSet, unsortedArrayIndexSet, arrayIndexSetIterator from PyNucleus_fem.quadrature cimport (simplexQuadratureRule, quadQuadratureRule, doubleSimplexQuadratureRule, GaussJacobi, @@ -19,6 +19,7 @@ from . clusterMethodCy cimport (tree_node, DistributedH2Matrix_localData, DistributedLinearOperator) from . nonlocalLaplacianBase cimport (double_local_matrix_t, + Complexdouble_local_matrix_t, nonlocalLaplacian, panelType, MASK_t) @@ -36,36 +37,18 @@ mpi4py.rc.initialize = False from mpi4py import MPI from mpi4py cimport MPI from PyNucleus_base.performanceLogger cimport PLogger, FakePLogger, LoggingPLogger -from PyNucleus_base.linear_operators cimport LinearOperator +from PyNucleus_base.linear_operators cimport LinearOperator, ComplexLinearOperator from PyNucleus_fem.meshCy cimport meshBase from PyNucleus_fem.DoFMaps cimport DoFMap from . kernelsCy cimport (Kernel, + ComplexKernel, FractionalKernel) include "config.pxi" - -cdef class nonlocalBuilder: - cdef: - meshBase mesh - public DoFMap dm - public DoFMap dm2 - public Kernel kernel - public double_local_matrix_t local_matrix - public double_local_matrix_t local_matrix_zeroExterior - public double_local_matrix_t local_matrix_surface - BOOL_t zeroExterior - REAL_t[::1] contrib, contribZeroExterior - list _d2c - public MPI.Comm comm - public FakePLogger PLogger - public dict params - cdef inline double_local_matrix_t getLocalMatrix(self, dict params) - cdef inline double_local_matrix_t getLocalMatrixBoundaryZeroExterior(self, dict params, BOOL_t infHorizon) - cpdef REAL_t getEntry(self, INDEX_t I, INDEX_t J) - cpdef REAL_t getEntryCluster(self, INDEX_t I, INDEX_t J) - cpdef LinearOperator assembleClusters(self, list Pnear, bint forceUnsymmetric=*, LinearOperator Anear=*, dict jumps=*, str prefix=*, tree_node myRoot=*, BOOL_t doDistributedAssembly=*) +include "nonlocalLaplacian_decl_REAL.pxi" +include "nonlocalLaplacian_decl_COMPLEX.pxi" cdef class nearFieldClusterPair: diff --git a/nl/PyNucleus_nl/nonlocalLaplacian.pyx b/nl/PyNucleus_nl/nonlocalLaplacian.pyx index edbfc570..7bed2e97 100644 --- a/nl/PyNucleus_nl/nonlocalLaplacian.pyx +++ b/nl/PyNucleus_nl/nonlocalLaplacian.pyx @@ -13,7 +13,7 @@ include "config.pxi" from libc.math cimport sin, cos, M_PI as pi from libcpp.map cimport map -from PyNucleus_base.myTypes import INDEX, REAL, ENCODE, BOOL +from PyNucleus_base.myTypes import INDEX, REAL, COMPLEX, ENCODE, BOOL from PyNucleus_base import uninitialized from PyNucleus_base.intTuple cimport intTuple from PyNucleus_base.ip_norm cimport (ip_distributed_nonoverlapping, @@ -30,12 +30,17 @@ from PyNucleus_base.linear_operators cimport (CSR_LinearOperator, SSS_LinearOperator, Dense_LinearOperator, VectorLinearOperator, + ComplexVectorLinearOperator, Dense_VectorLinearOperator, Dense_SubBlock_LinearOperator, diagonalOperator, TimeStepperLinearOperator, nullOperator, - sparseGraph) + sparseGraph, + ComplexCSR_LinearOperator, + ComplexSSS_LinearOperator, + ComplexDense_LinearOperator, + ComplexdiagonalOperator) from PyNucleus_fem.splitting import dofmapSplitter from PyNucleus_fem import dofmapFactory # from . nonlocalLaplacianBase import MASK @@ -73,643 +78,8 @@ cdef REAL_t INTERFACE_DOF = np.inf LOGGER = logging.getLogger(__name__) -cdef class IndexManager: - cdef: - DoFMap dm - indexSet myDofs - public INDEX_t[::1] localDoFs - INDEX_t[::1] permutedDoFsLocal - INDEX_t[:, ::1] idxCellFlip - LinearOperator A - sparsityPattern sP - public dict cache - intTuple hv - - def __init__(self, DoFMap dm, LinearOperator A=None, cellPairIdentifierSize=1, indexSet myDofs=None, sparsityPattern sP=None): - cdef: - INDEX_t[:, ::1] idxCellFlip - INDEX_t j, offset - self.dm = dm - self.myDofs = myDofs - self.localDoFs = uninitialized((2*self.dm.dofs_per_element), dtype=INDEX) - self.permutedDoFsLocal = uninitialized((2*self.dm.dofs_per_element), dtype=INDEX) - self.hv = intTuple.create(uninitialized(cellPairIdentifierSize, dtype=INDEX)) - self.A = A - self.sP = sP - if self.dm.mesh.dim == 1: - idxCellFlip = uninitialized((2, self.dm.dofs_per_element), dtype=INDEX) - for j in range(self.dm.dofs_per_vertex): - idxCellFlip[0, j] = j - idxCellFlip[0, self.dm.dofs_per_vertex+j] = self.dm.dofs_per_vertex+j - - idxCellFlip[1, j] = self.dm.dofs_per_vertex+j - idxCellFlip[1, self.dm.dofs_per_vertex+j] = j - offset = 2*self.dm.dofs_per_vertex - for j in range(self.dm.dofs_per_cell): - idxCellFlip[0, offset+j] = offset+j - idxCellFlip[1, offset+self.dm.dofs_per_cell-1-j] = offset+j - - elif self.dm.mesh.dim == 2: - idxCellFlip = uninitialized((3, self.dm.dofs_per_element), dtype=INDEX) - for j in range(self.dm.dofs_per_vertex): - idxCellFlip[0, 0*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j - idxCellFlip[0, 1*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j - idxCellFlip[0, 2*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j - - idxCellFlip[1, 0*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j - idxCellFlip[1, 1*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j - idxCellFlip[1, 2*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j - - idxCellFlip[2, 0*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j - idxCellFlip[2, 1*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j - idxCellFlip[2, 2*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j - elif self.dm.mesh.dim == 3: - idxCellFlip = uninitialized((12, self.dm.dofs_per_element), dtype=INDEX) - for j in range(self.dm.dofs_per_vertex): - idxCellFlip[0, 0*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j - idxCellFlip[0, 1*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j - idxCellFlip[0, 2*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j - idxCellFlip[0, 3*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j - - idxCellFlip[1, 0*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j - idxCellFlip[1, 1*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j - idxCellFlip[1, 2*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j - idxCellFlip[1, 3*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j - - idxCellFlip[2, 0*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j - idxCellFlip[2, 1*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j - idxCellFlip[2, 2*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j - idxCellFlip[2, 3*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j - - idxCellFlip[3, 0*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j - idxCellFlip[3, 1*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j - idxCellFlip[3, 2*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j - idxCellFlip[3, 3*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j - - idxCellFlip[4, 0*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j - idxCellFlip[4, 1*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j - idxCellFlip[4, 2*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j - idxCellFlip[4, 3*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j - - idxCellFlip[5, 0*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j - idxCellFlip[5, 1*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j - idxCellFlip[5, 2*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j - idxCellFlip[5, 3*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j - - idxCellFlip[6, 0*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j - idxCellFlip[6, 1*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j - idxCellFlip[6, 2*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j - idxCellFlip[6, 3*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j - - idxCellFlip[7, 0*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j - idxCellFlip[7, 1*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j - idxCellFlip[7, 2*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j - idxCellFlip[7, 3*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j - - idxCellFlip[8, 0*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j - idxCellFlip[8, 1*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j - idxCellFlip[8, 2*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j - idxCellFlip[8, 3*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j - - idxCellFlip[9, 0*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j - idxCellFlip[9, 1*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j - idxCellFlip[9, 2*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j - idxCellFlip[9, 3*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j - - idxCellFlip[10, 0*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j - idxCellFlip[10, 1*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j - idxCellFlip[10, 2*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j - idxCellFlip[10, 3*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j - - idxCellFlip[11, 0*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j - idxCellFlip[11, 1*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j - idxCellFlip[11, 2*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j - idxCellFlip[11, 3*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j - - else: - raise NotImplementedError() - self.idxCellFlip = idxCellFlip - self.cache = {} - - cdef inline void getDoFsElem(self, INDEX_t cellNo): - cdef: - INDEX_t p, dof - for p in range(self.dm.dofs_per_element): - self.localDoFs[p] = self.dm.cell2dof(cellNo, p) - if self.myDofs is not None: - for p in range(self.dm.dofs_per_element): - dof = self.localDoFs[p] - if not self.myDofs.inSet(dof): - self.localDoFs[p] = -1 - - cdef inline BOOL_t getDoFsElemElem(self, INDEX_t cellNo1, INDEX_t cellNo2): - cdef: - INDEX_t p, dof - BOOL_t canSkip = True - for p in range(self.dm.dofs_per_element): - dof = self.dm.cell2dof(cellNo1, p) - self.localDoFs[p] = dof - canSkip = canSkip and dof < 0 - for p in range(self.dm.dofs_per_element): - dof = self.dm.cell2dof(cellNo2, p) - self.localDoFs[self.dm.dofs_per_element+p] = dof - canSkip = canSkip and dof < 0 - return canSkip - - cdef inline void addToMatrixElemSym(self, const REAL_t[::1] contrib, REAL_t fac): - cdef: - INDEX_t k, p, q, I, J - k = 0 - for p in range(self.dm.dofs_per_element): - I = self.localDoFs[p] - if I >= 0: - self.A.addToEntry(I, I, fac*contrib[k]) - k += 1 - for q in range(p+1, self.dm.dofs_per_element): - J = self.localDoFs[q] - if J >= 0: - self.A.addToEntry(I, J, fac*contrib[k]) - self.A.addToEntry(J, I, fac*contrib[k]) - k += 1 - else: - k += self.dm.dofs_per_element-p - - cdef inline void addToMatrixElem(self, const REAL_t[::1] contrib, REAL_t fac): - cdef: - INDEX_t k, p, q, I, J - k = 0 - for p in range(self.dm.dofs_per_element): - I = self.localDoFs[p] - if I >= 0: - for q in range(self.dm.dofs_per_element): - J = self.localDoFs[q] - if J >= 0: - self.A.addToEntry(I, J, fac*contrib[k]) - k += 1 - else: - k += self.dm.dofs_per_element - - cdef inline void addToSparsityElemElemSym(self): - # Add symmetric 'contrib' to elements i and j in symmetric fashion - cdef: - INDEX_t k, p, q, I, J - k = 0 - for p in range(2*self.dm.dofs_per_element): - I = self.localDoFs[p] - if I >= 0: - self.sP.add(I, I) - k += 1 - for q in range(p+1, 2*self.dm.dofs_per_element): - J = self.localDoFs[q] - if J >= 0: - self.sP.add(I, J) - self.sP.add(J, I) - k += 1 - else: - k += 2*self.dm.dofs_per_element-p - - cdef inline void addToMatrixElemElemSym(self, const REAL_t[::1] contrib, REAL_t fac): - # Add symmetric 'contrib' to elements i and j in symmetric fashion - cdef: - INDEX_t k, p, q, I, J - k = 0 - for p in range(2*self.dm.dofs_per_element): - I = self.localDoFs[p] - if I >= 0: - self.A.addToEntry(I, I, fac*contrib[k]) - k += 1 - for q in range(p+1, 2*self.dm.dofs_per_element): - J = self.localDoFs[q] - if J >= 0: - self.A.addToEntry(I, J, fac*contrib[k]) - self.A.addToEntry(J, I, fac*contrib[k]) - k += 1 - else: - k += 2*self.dm.dofs_per_element-p - - cdef inline void addToSparsityElemElem(self): - # Add general 'contrib' to elements i and j - cdef: - INDEX_t k, p, q, I, J - k = 0 - for p in range(2*self.dm.dofs_per_element): - I = self.localDoFs[p] - if I >= 0: - for q in range(2*self.dm.dofs_per_element): - J = self.localDoFs[q] - if J >= 0: - self.sP.add(I, J) - k += 1 - else: - k += 2*self.dm.dofs_per_element - - cdef inline void addToMatrixElemElem(self, const REAL_t[::1] contrib, REAL_t fac): - # Add general 'contrib' to elements i and j - cdef: - INDEX_t k, p, q, I, J - k = 0 - for p in range(2*self.dm.dofs_per_element): - I = self.localDoFs[p] - if I >= 0: - for q in range(2*self.dm.dofs_per_element): - J = self.localDoFs[q] - if J >= 0: - self.A.addToEntry(I, J, fac*contrib[k]) - k += 1 - else: - k += 2*self.dm.dofs_per_element - - def buildMasksForClusters_py(self, list clusterList, bint useSymmetricCells): - cdef: - INDEX_t startCluster = 0 - return self.buildMasksForClusters(clusterList, useSymmetricCells, &startCluster) - - cdef tupleDictMASK buildMasksForClusters(self, list clusterList, bint useSymmetricCells, INDEX_t *startCluster): - cdef: - nearFieldClusterPair cluster = clusterList[0] - MASK_t cellMask1, cellMask2 - indexSet cellsUnion = cluster.cellsUnion - indexSetIterator it = cellsUnion.getIter(), it2 = cellsUnion.getIter() - indexSet clusterDofs1, clusterDofs2 - INDEX_t cellNo1 = -1, cellNo2 = -1 - INDEX_t[::1] cellPair = uninitialized((2), dtype=INDEX) - INDEX_t[::1] cellPair2 = uninitialized((2), dtype=INDEX) - tupleDictMASK masks = tupleDictMASK(self.dm.mesh.num_cells, deleteHits=False, logicalAndHits=True, length_inc=20) - INDEX_t p, I - # dict cellMasks1, cellMasks2 - MASK_t mask, mask1, mask2, cellMask11, cellMask12, cellMask21, cellMask22, k - INDEX_t dofs_per_element = self.dm.dofs_per_element - map[INDEX_t, MASK_t] cellMasks1 - map[INDEX_t, MASK_t] cellMasks2 - - cellMask1.reset() - cellMask2.reset() - for cluster in clusterList[startCluster[0]:]: - startCluster[0] += 1 - cellsUnion = cluster.cellsUnion - # cellMasks1 = {} - # cellMasks2 = {} - clusterDofs1 = cluster.n1.get_dofs() - clusterDofs2 = cluster.n2.get_dofs() - - it.setIndexSet(cellsUnion) - - while it.step(): - cellNo1 = it.i - mask1.reset() - mask2.reset() - k = 1 - for p in range(dofs_per_element): - I = self.dm.cell2dof(cellNo1, p) - if I >= 0: - if clusterDofs1.inSet(I): - mask1 |= k - if clusterDofs2.inSet(I): - mask2 |= k - k = k << 1 - cellMasks1[cellNo1] = mask1 - cellMasks2[cellNo1] = mask2 - - if not useSymmetricCells: - # TODO: Think some more about this branch, maybe this can be improved. - it.reset() - it2.setIndexSet(cellsUnion) - # it.setIndexSet(cluster.n1.cells) - # it2.setIndexSet(cluster.n2.cells) - while it.step(): - cellNo1 = it.i - cellPair[0] = cellNo1 - cellMask11 = cellMasks1[cellNo1] - cellMask12 = cellMasks2[cellNo1] - it2.reset() - while it2.step(): - cellNo2 = it2.i - cellMask21 = cellMasks1[cellNo2] - cellMask22 = cellMasks2[cellNo2] - cellMask1 = cellMask11 | (cellMask21 << dofs_per_element) - cellMask2 = cellMask12 | (cellMask22 << dofs_per_element) - if (cellMask1.none()) or (cellMask2.none()): - continue - cellPair[1] = cellNo2 - mask = self.getElemElemMask(cellMask1, cellMask2) - # does a logical "and" if there already is an entry - masks.enterValue(cellPair, mask) - else: - it.setIndexSet(cluster.n1.cells) - it2.setIndexSet(cluster.n2.cells) - while it.step(): - cellNo1 = it.i - cellPair[0] = cellNo1 - cellPair2[1] = cellNo1 - cellMask11 = cellMasks1[cellNo1] - cellMask12 = cellMasks2[cellNo1] - it2.reset() - while it2.step(): - cellNo2 = it2.i - cellMask21 = cellMasks1[cellNo2] - cellMask22 = cellMasks2[cellNo2] - if cellNo1 > cellNo2: - cellMask1 = cellMask21 | (cellMask11 << dofs_per_element) - cellMask2 = cellMask22 | (cellMask12 << dofs_per_element) - if (cellMask1.none()) or (cellMask2.none()): - continue - cellPair2[0] = cellNo2 - mask = self.getElemElemSymMask(cellMask1, cellMask2) - # does a logical "and" if there already is an entry - masks.enterValue(cellPair2, mask) - else: - cellMask1 = cellMask11 | (cellMask21 << dofs_per_element) - cellMask2 = cellMask12 | (cellMask22 << dofs_per_element) - if (cellMask1.none()) or (cellMask2.none()): - continue - cellPair[1] = cellNo2 - mask = self.getElemElemSymMask(cellMask1, cellMask2) - # does a logical "and" if there already is an entry - masks.enterValue(cellPair, mask) - - if masks.nnz > 10000000: - break - - return masks - - # cdef inline MASK_t getElemSymEntryMask(self, INDEX_t cellNo1, INDEX_t I, INDEX_t J): - # # Add symmetric 'contrib' to elements i and j in symmetric fashion - # cdef: - # INDEX_t p, q, K, L - # MASK_t k = 1 - # MASK_t mask = 0 - # for p in range(self.dm.dofs_per_element): - # K = self.dm.cell2dof(cellNo1, p) - # for q in range(p, self.dm.dofs_per_element): - # L = self.dm.cell2dof(cellNo1, q) - # if (I == K and J == L) or (J == K and I == L): - # mask |= k - # k = k << 1 - # return mask - - cdef inline MASK_t getElemElemSymMask(self, MASK_t mask_dofs1, MASK_t mask_dofs2): - # Add symmetric 'contrib' to elements i and j in symmetric fashion - cdef: - INDEX_t p, q - MASK_t k = 1 - MASK_t mask - mask.reset() - for p in range(2*self.dm.dofs_per_element): - if mask_dofs1[p]: - for q in range(p, 2*self.dm.dofs_per_element): - if mask_dofs2[q]: - mask |= k - k = k << 1 - else: - k = k << (2*self.dm.dofs_per_element-p) - return mask - - cdef inline MASK_t getElemElemMask(self, MASK_t mask_dofs1, MASK_t mask_dofs2): - # Add symmetric 'contrib' to elements i and j in symmetric fashion - cdef: - INDEX_t p, q - MASK_t k = 1 - MASK_t mask - mask.reset() - for p in range(2*self.dm.dofs_per_element): - if mask_dofs1[p]: - for q in range(2*self.dm.dofs_per_element): - if mask_dofs2[q]: - mask |= k - k = k << 1 - else: - k = k << (2*self.dm.dofs_per_element) - return mask - - cdef inline MASK_t getElemSymMask(self): - # Add symmetric 'contrib' to elements i and j in symmetric fashion - cdef: - INDEX_t p, q - MASK_t k = 1 - MASK_t mask - mask.reset() - for p in range(self.dm.dofs_per_element): - if self.localDoFs[p] >= 0: - for q in range(p, self.dm.dofs_per_element): - if self.localDoFs[q] >= 0: - mask |= k - k = k << 1 - else: - k = k << (self.dm.dofs_per_element-p) - return mask - - cdef inline MASK_t getElemElemSymEntryMask(self, INDEX_t cellNo1, INDEX_t cellNo2, INDEX_t I, INDEX_t J): - # Add symmetric 'contrib' to elements i and j in symmetric fashion - cdef: - INDEX_t p, q, K, L - MASK_t k = 1 - MASK_t mask - mask.reset() - for p in range(2*self.dm.dofs_per_element): - if p < self.dm.dofs_per_element: - K = self.dm.cell2dof(cellNo1, p) - else: - K = self.dm.cell2dof(cellNo2, p-self.dm.dofs_per_element) - - for q in range(p, 2*self.dm.dofs_per_element): - if q < self.dm.dofs_per_element: - L = self.dm.cell2dof(cellNo1, q) - else: - L = self.dm.cell2dof(cellNo2, q-self.dm.dofs_per_element) - if (I == K and J == L) or (J == K and I == L): - mask |= k - k = k << 1 - return mask - - cdef inline void addToMatrixElemElemSymMasked(self, const REAL_t[::1] contrib, REAL_t fac, MASK_t mask): - # Add symmetric 'contrib' to elements i and j in symmetric fashion - cdef: - INDEX_t k, p, q, I, J - MASK_t one = 1 - k = 0 - for p in range(2*self.dm.dofs_per_element): - I = self.localDoFs[p] - if mask[k]: - self.A.addToEntry(I, I, fac*contrib[k]) - k += 1 - for q in range(p+1, 2*self.dm.dofs_per_element): - if mask[k]: - J = self.localDoFs[q] - self.A.addToEntry(I, J, fac*contrib[k]) - self.A.addToEntry(J, I, fac*contrib[k]) - k += 1 - - cdef inline void addToMatrixElemElemMasked(self, const REAL_t[::1] contrib, REAL_t fac, MASK_t mask): - # Add unsymmetric 'contrib' to elements i and j in unsymmetric fashion - cdef: - INDEX_t k, p, q, I, J - MASK_t one = 1 - k = 0 - for p in range(2*self.dm.dofs_per_element): - I = self.localDoFs[p] - for q in range(2*self.dm.dofs_per_element): - if mask[k]: - J = self.localDoFs[q] - self.A.addToEntry(I, J, fac*contrib[k]) - k += 1 - - cdef void addToCache(self, REAL_t[::1] contrib, INDEX_t[::1] ID, INDEX_t perm, BOOL_t inv=False): - cdef: - intTuple hv = intTuple.create(ID) - contribNew = uninitialized((contrib.shape[0]), dtype=REAL) - self.permute(contrib, contribNew, perm, inv) - self.cache[hv] = contribNew - - cdef void permute(self, REAL_t[::1] contrib, REAL_t[::1] contribNew, INDEX_t perm, BOOL_t inv=False): - cdef: - INDEX_t K, p, q - INDEX_t k, i, j - INDEX_t dofs_per_element = self.dm.dofs_per_element - INDEX_t dofs_per_element2 = 2*dofs_per_element - BOOL_t perm0 = perm & 1 - INDEX_t perm1 = (perm >> 1) & 3 - INDEX_t perm2 = (perm >> 3) & 3 - INDEX_t[::1] permutedDoFsLocal = self.permutedDoFsLocal - if inv and self.dm.dim == 2: - if perm1 == 1: - perm1 = 2 - elif perm1 == 2: - perm1 = 1 - - if perm2 == 1: - perm2 = 2 - elif perm2 == 2: - perm2 = 1 - if perm0: - perm1, perm2 = perm2, perm1 - - for p in range(dofs_per_element2): - if perm0: - i = p+dofs_per_element - if i >= dofs_per_element2: - i -= dofs_per_element2 - else: - i = p - if (i < dofs_per_element): - i = self.idxCellFlip[perm1, i] - else: - i = dofs_per_element + self.idxCellFlip[perm2, i-dofs_per_element] - permutedDoFsLocal[p] = i - - K = 0 - for p in range(dofs_per_element2): - i = permutedDoFsLocal[p] - - k = 2*dofs_per_element*i-(i*(i+1) >> 1) + i - contribNew[K] = contrib[k] - K += 1 - - for q in range(p+1, dofs_per_element2): - j = permutedDoFsLocal[q] - - if i > j: - k = dofs_per_element2*j-(j*(j+1) >> 1) + i - else: - k = dofs_per_element2*i-(i*(i+1) >> 1) + j - contribNew[K] = contrib[k] - K += 1 - - def __repr__(self): - s = '' - s += 'Cache size: {}'.format(len(self.cache)) - return s - - -cdef class IndexManagerVector(IndexManager): - cdef: - VectorLinearOperator vecA - INDEX_t vectorSize - - def __init__(self, DoFMap dm, VectorLinearOperator A=None, cellPairIdentifierSize=1, indexSet myDofs=None, sparsityPattern sP=None): - super(IndexManagerVector, self).__init__(dm, None, cellPairIdentifierSize, myDofs, sP) - self.vecA = A - self.vectorSize = A.vectorSize - - cdef inline void addToMatrixElemSymVector(self, REAL_t[:, ::1] contrib, REAL_t fac): - cdef: - INDEX_t k, p, q, I, J - for p in range(contrib.shape[0]): - for q in range(self.vectorSize): - contrib[p, q] *= fac - k = 0 - for p in range(self.dm.dofs_per_element): - I = self.localDoFs[p] - if I >= 0: - self.vecA.addToEntry(I, I, contrib[k, :]) - k += 1 - for q in range(p+1, self.dm.dofs_per_element): - J = self.localDoFs[q] - if J >= 0: - self.vecA.addToEntry(I, J, contrib[k, :]) - self.vecA.addToEntry(J, I, contrib[k, :]) - k += 1 - else: - k += self.dm.dofs_per_element-p - - cdef inline void addToMatrixElemVector(self, REAL_t[:, ::1] contrib, REAL_t fac): - cdef: - INDEX_t k, p, q, I, J - for p in range(contrib.shape[0]): - for q in range(self.vectorSize): - contrib[p, q] *= fac - k = 0 - for p in range(self.dm.dofs_per_element): - I = self.localDoFs[p] - if I >= 0: - for q in range(self.dm.dofs_per_element): - J = self.localDoFs[q] - if J >= 0: - self.vecA.addToEntry(I, J, contrib[k, :]) - k += 1 - else: - k += self.dm.dofs_per_element - - cdef inline void addToMatrixElemElemSymVector(self, REAL_t[:, ::1] contrib, REAL_t fac): - # Add symmetric 'contrib' to elements i and j in symmetric fashion - cdef: - INDEX_t k, p, q, I, J - for p in range(contrib.shape[0]): - for q in range(self.vectorSize): - contrib[p, q] *= fac - k = 0 - for p in range(2*self.dm.dofs_per_element): - I = self.localDoFs[p] - if I >= 0: - self.vecA.addToEntry(I, I, contrib[k, :]) - k += 1 - for q in range(p+1, 2*self.dm.dofs_per_element): - J = self.localDoFs[q] - if J >= 0: - self.vecA.addToEntry(I, J, contrib[k, :]) - self.vecA.addToEntry(J, I, contrib[k, :]) - k += 1 - else: - k += 2*self.dm.dofs_per_element-p - - cdef inline void addToMatrixElemElemVector(self, REAL_t[:, ::1] contrib, REAL_t fac): - # Add general 'contrib' to elements i and j - cdef: - INDEX_t k, p, q, I, J - for p in range(contrib.shape[0]): - for q in range(self.vectorSize): - contrib[p, q] *= fac - k = 0 - for p in range(2*self.dm.dofs_per_element): - I = self.localDoFs[p] - if I >= 0: - for q in range(2*self.dm.dofs_per_element): - J = self.localDoFs[q] - if J >= 0: - self.vecA.addToEntry(I, J, contrib[k, :]) - k += 1 - else: - k += 2*self.dm.dofs_per_element - +include "nonlocalLaplacian_REAL.pxi" +include "nonlocalLaplacian_COMPLEX.pxi" # These functions are used by getEntry @@ -755,2036 +125,6 @@ cdef inline MASK_t getElemElemSymMask(DoFMap DoFMap, INDEX_t cellNo1, INDEX_t ce return mask -cdef inline REAL_t extractElemSymMasked(DoFMap DoFMap, const REAL_t[::1] contrib, REAL_t fac, MASK_t mask): - # Add symmetric 'contrib' to elements i and j in symmetric fashion - cdef: - INDEX_t k, p, q - REAL_t s = 0. - k = 0 - for p in range(DoFMap.dofs_per_element): - for q in range(p, DoFMap.dofs_per_element): - if mask[k]: - s += fac*contrib[k] - k += 1 - return s - - -cdef inline REAL_t extractElemElemSymMasked(DoFMap DoFMap, const REAL_t[::1] contrib, REAL_t fac, MASK_t mask): - # Add symmetric 'contrib' to elements i and j in symmetric fashion - cdef: - INDEX_t k, p, q - REAL_t s = 0. - k = 0 - for p in range(2*DoFMap.dofs_per_element): - for q in range(p, 2*DoFMap.dofs_per_element): - if mask[k]: - s += fac*contrib[k] - k += 1 - return s - - -cdef class nonlocalBuilder: - def __init__(self, - meshBase mesh, - DoFMap dm, - Kernel kernel, - dict params={}, - bint zeroExterior=True, - MPI.Comm comm=None, - FakePLogger PLogger=None, - DoFMap dm2=None, - **kwargs): - cdef: - MASK_t mask - if 'boundary' in kwargs: - warnings.warn('"boundary" parameter deprecated', DeprecationWarning) - zeroExterior = kwargs['boundary'] - - self.dm = dm - self.mesh = self.dm.mesh - assert self.dm.mesh == mesh - if dm2 is not None: - self.dm2 = dm2 - assert type(self.dm) == type(self.dm2) - assert self.dm.mesh == self.dm2.mesh - self.kernel = kernel - if self.kernel.finiteHorizon: - self.zeroExterior = False - else: - self.zeroExterior = zeroExterior - self.comm = comm - self.params = params - - assert isinstance(self.kernel.horizon, constant) - assert kernel.dim == mesh.dim - assert kernel.dim == dm.mesh.dim - - # volume integral - self.local_matrix = self.getLocalMatrix(params) - - if self.local_matrix.symmetricLocalMatrix: - self.contrib = uninitialized(((2*self.dm.dofs_per_element)*(2*self.dm.dofs_per_element+1)//2), dtype=REAL) - else: - self.contrib = uninitialized(((2*self.dm.dofs_per_element)**2), dtype=REAL) - assert self.contrib.shape[0] <= mask.size(), "Mask type size = {} is not large enough for {} entries. Please set a larger size and recompile.".format(mask.size(), self.contrib.shape[0]) - - self.local_matrix.setMesh1(self.dm.mesh) - if self.dm2 is None: - self.local_matrix.setMesh2(self.dm.mesh) - else: - self.local_matrix.setMesh2(self.dm2.mesh) - - LOGGER.debug(self.local_matrix) - - # surface integrals - self.local_matrix_zeroExterior = self.getLocalMatrixBoundaryZeroExterior(params, infHorizon=True) - self.local_matrix_surface = self.getLocalMatrixBoundaryZeroExterior(params, infHorizon=False) - - if self.local_matrix_zeroExterior is not None: - self.local_matrix_zeroExterior.setMesh1(self.dm.mesh) - self.local_matrix_surface.setMesh1(self.dm.mesh) - if self.local_matrix_zeroExterior.symmetricLocalMatrix: - self.contribZeroExterior = uninitialized((self.dm.dofs_per_element*(self.dm.dofs_per_element+1)//2), dtype=REAL) - else: - self.contribZeroExterior = uninitialized(((self.dm.dofs_per_element)**2), dtype=REAL) - LOGGER.debug(self.local_matrix_zeroExterior) - LOGGER.debug(self.local_matrix_surface) - else: - self.contribZeroExterior = uninitialized((0), dtype=REAL) - - - if PLogger is not None: - self.PLogger = PLogger - else: - self.PLogger = FakePLogger() - - @property - def d2c(self): - if self._d2c is None: - self._d2c = self.dm.getPatchLookup() - return self._d2c - - cdef inline double_local_matrix_t getLocalMatrix(self, dict params): - cdef: - BOOL_t symmetric, forceNonSym - fractionalOrderBase s - target_order = params.get('target_order', None) - quadType = params.get('quadType', 'classical-refactored') - assert quadType in ( - 'classical-refactored' - ) - - forceNonSym = params.get('forceNonSym', False) - symmetric = not forceNonSym and self.kernel.symmetric - if quadType == 'classical-refactored': - if self.mesh.dim == 1: - if symmetric: - local_matrix = fractionalLaplacian1D(self.kernel, - mesh=self.mesh, - DoFMap=self.dm, - target_order=target_order) - else: - local_matrix = fractionalLaplacian1D_nonsym(self.kernel, - mesh=self.mesh, - DoFMap=self.dm, - target_order=target_order) - elif self.mesh.dim == 2: - if symmetric: - if not isinstance(self.dm, Product_DoFMap): - local_matrix = fractionalLaplacian2D(self.kernel, - mesh=self.mesh, - DoFMap=self.dm, - target_order=target_order) - else: - raise NotImplementedError() - else: - local_matrix = fractionalLaplacian2D_nonsym(self.kernel, - mesh=self.mesh, - DoFMap=self.dm, - target_order=target_order) - else: - raise NotImplementedError() - return local_matrix - - cdef inline double_local_matrix_t getLocalMatrixBoundaryZeroExterior(self, dict params, BOOL_t infHorizon): - cdef: - fractionalOrderBase s - target_order = params.get('target_order', None) - if 'quadTypeBoundary' in params: - quadType = params['quadTypeBoundary'] - else: - quadType = params.get('quadType', 'classical-refactored') - assert quadType in ( - 'classical-refactored' - ) - - if isinstance(self.kernel, FractionalKernel): - s = self.kernel.s - assert ((s.min < 1.) and (s.max < 1.)) or ((s.min > 1.) and (s.max > 1.)) - assert isinstance(self.kernel.horizon, constant) - if infHorizon: - kernelInfHorizon = self.kernel.getModifiedKernel(horizon=constant(np.inf)) - else: - kernelInfHorizon = self.kernel - if quadType == 'classical-refactored': - kernelBoundary = kernelInfHorizon.getBoundaryKernel() - if self.mesh.dim == 1: - local_matrix = fractionalLaplacian1D_boundary(kernelBoundary, - mesh=self.mesh, - DoFMap=self.dm, - target_order=target_order) - elif self.mesh.dim == 2: - if not isinstance(self.dm, Product_DoFMap): - local_matrix = fractionalLaplacian2D_boundary(kernelBoundary, - mesh=self.mesh, - DoFMap=self.dm, - target_order=target_order) - else: - raise NotImplementedError() - else: - raise NotImplementedError() - else: - raise NotImplementedError() - else: - assert isinstance(self.kernel.horizon, constant) - if infHorizon: - kernelInfHorizon = self.kernel.getModifiedKernel(horizon=constant(np.inf)) - else: - kernelInfHorizon = self.kernel - if quadType == 'classical-refactored': - kernelBoundary = kernelInfHorizon.getBoundaryKernel() - if self.mesh.dim == 1: - local_matrix = fractionalLaplacian1D_boundary(kernelBoundary, - mesh=self.mesh, - DoFMap=self.dm, - target_order=target_order) - elif self.mesh.dim == 2: - if not isinstance(self.dm, Product_DoFMap): - local_matrix = fractionalLaplacian2D_boundary(kernelBoundary, - mesh=self.mesh, - DoFMap=self.dm, - target_order=target_order) - else: - raise NotImplementedError() - else: - raise NotImplementedError() - else: - local_matrix = None - return local_matrix - - def getSparse(self, BOOL_t returnNearField=False, str prefix=''): - cdef: - INDEX_t cellNo1, cellNo2 - REAL_t[::1] contrib = self.contrib - IndexManager iM - REAL_t fac - BOOL_t symmetricLocalMatrix = self.local_matrix.symmetricLocalMatrix - BOOL_t symmetricCells = self.local_matrix.symmetricCells - panelType panel - BOOL_t ignoreDiagonalBlocks = False - BOOL_t doDistributedAssembly - LinearOperator A = None - BOOL_t useSymmetricMatrix - REAL_t[:, :, ::1] boxes = None - sparseGraph cells = None - REAL_t[:, ::1] coords = None - tree_node root, myRoot - list Pnear - nearFieldClusterPair cP - DoFMap treeDM - arrayIndexSet oldDoFs - indexSetIterator it - tree_node n - indexSetIterator cellIt1, cellIt2 - set newDoFs - INDEX_t dof_tree, dof, new_dof - INDEX_t[::1] translate - arrayIndexSet cells1, cells2 - sparsityPattern processedCellPairs - - if self.dm.mesh.dim == 1: - fac = 0.125 - else: - fac = 1. - self.params['minClusterSize'] = self.params.get('minClusterSize', int(fac*(self.kernel.horizonValue/self.dm.mesh.h)**self.dm.mesh.dim)) - refParams = self.getH2RefinementParams() - doDistributedAssembly = self.comm is not None and self.comm.size > 1 and self.dm.num_dofs > self.comm.size - forceUnsymmetric = self.params.get('forceUnsymmetric', doDistributedAssembly) - assembleOnRoot = self.params.get('assembleOnRoot', False) - localFarFieldIndexing = True - localFarFieldIndexing = doDistributedAssembly and not assembleOnRoot and localFarFieldIndexing - if doDistributedAssembly and not assembleOnRoot: - assert forceUnsymmetric - - # We want to capture all element x element interactions. - # We set up a temporary dofmap and construct a near field wrt that. - treeDM = dofmapFactory('P1', self.dm.mesh, -1) - with self.PLogger.Timer(prefix+'boxes, cells, coords'): - boxes, cells = getDoFBoxesAndCells(treeDM.mesh, treeDM, self.comm) - coords = treeDM.getDoFCoordinates() - - # construct the cluster tree - root, myRoot, _, doDistributedAssembly = self.getTree(doDistributedAssembly, refParams, boxes, cells, coords, allNearField=True, dm=treeDM) - - # get the covering cluster pairs - Pnear = self.getCoveringClusters(root, myRoot, doDistributedAssembly, refParams, boxes, cells, coords, assembleOnRoot=assembleOnRoot, ignoreDiagonalBlocks=ignoreDiagonalBlocks) - - # translate to original dofmap - translate = -np.ones((treeDM.num_dofs), dtype=INDEX) - for cellNo in range(treeDM.mesh.num_cells): - for dofNo in range(treeDM.dofs_per_element): - dof = self.dm.cell2dof(cellNo, dofNo) - if dof >= 0: - dof_tree = treeDM.cell2dof(cellNo, dofNo) - translate[dof_tree] = dof - - for n in root.leaves(): - oldDoFs = n._dofs - newDoFs = set() - it = oldDoFs.getIter() - while it.step(): - dof_tree = it.i - new_dof = translate[dof_tree] - if new_dof >= 0: - newDoFs.add(new_dof) - - if len(newDoFs) > 0: - newDoFsArray = np.array(list(newDoFs), dtype=INDEX) - n._dofs = arrayIndexSet(newDoFsArray) - else: - n._dofs = arrayIndexSet() - for n in root.get_tree_nodes(): - n._num_dofs = -1 - - Pnear_filtered = [] - for cP in Pnear: - if (cP.n1.get_num_dofs() > 0) or (cP.n2.get_num_dofs() > 0): - Pnear_filtered.append(cP) - Pnear = Pnear_filtered - - useSymmetricMatrix = self.local_matrix.symmetricLocalMatrix and self.local_matrix.symmetricCells and not forceUnsymmetric - - with self.PLogger.Timer(prefix+'build near field sparsity pattern'): - if myRoot is not None and doDistributedAssembly: - A = getSparseNearField(self.dm, Pnear, symmetric=useSymmetricMatrix, myRoot=myRoot) - else: - A = getSparseNearField(self.dm, Pnear, symmetric=useSymmetricMatrix) - - # We are not using assembleClusters because we don't want to use surface integration - with self.PLogger.Timer(prefix+'interior - compute'): - iM = IndexManager(self.dm, A) - processedCellPairs = sparsityPattern(self.dm.mesh.num_cells) - - for cP in Pnear: - cells1 = cP.n1.cells - cells2 = cP.n2.cells - cellIt1 = cells1.getIter() - cellIt2 = cells2.getIter() - while cellIt1.step(): - cellNo1 = cellIt1.i - self.local_matrix.setCell1(cellNo1) - cellIt2.reset() - while cellIt2.step(): - cellNo2 = cellIt2.i - if processedCellPairs.findIndex(cellNo1, cellNo2): - continue - processedCellPairs.add(cellNo1, cellNo2) - - processedCellPairs.add(cellNo1, cellNo2) - self.local_matrix.setCell2(cellNo2) - if iM.getDoFsElemElem(cellNo1, cellNo2): - continue - panel = self.local_matrix.getPanelType() - if cellNo1 == cellNo2: - if panel != IGNORED: - self.local_matrix.eval(contrib, panel) - if symmetricLocalMatrix: - iM.addToMatrixElemElemSym(contrib, 1.) - else: - iM.addToMatrixElemElem(contrib, 1.) - else: - if symmetricCells: - if panel != IGNORED: - self.local_matrix.eval(contrib, panel) - # If the kernel is symmetric, the contributions from (cellNo1, cellNo2) and (cellNo2, cellNo1) - # are the same. We multiply by 2 to account for the contribution from cells (cellNo2, cellNo1). - if symmetricLocalMatrix: - iM.addToMatrixElemElemSym(contrib, 2.) - else: - iM.addToMatrixElemElem(contrib, 2.) - else: - if panel != IGNORED: - self.local_matrix.eval(contrib, panel) - if symmetricLocalMatrix: - iM.addToMatrixElemElemSym(contrib, 1.) - else: - iM.addToMatrixElemElem(contrib, 1.) - self.local_matrix.swapCells() - panel = self.local_matrix.getPanelType() - if panel != IGNORED: - if iM.getDoFsElemElem(cellNo2, cellNo1): - continue - self.local_matrix.eval(contrib, panel) - if symmetricLocalMatrix: - iM.addToMatrixElemElemSym(contrib, 1.) - else: - iM.addToMatrixElemElem(contrib, 1.) - self.local_matrix.swapCells() - - if doDistributedAssembly and assembleOnRoot: - with self.PLogger.Timer('reduceNearOp'): - A = self.reduceNearOp(A, myRoot.get_dofs()) - if localFarFieldIndexing: - _, local_dm, lclR, lclP = self.doLocalFarFieldIndexing(myRoot, boxes) - if self.comm is None or (assembleOnRoot and self.comm.rank == 0) or (not assembleOnRoot): - if self.comm is None or (assembleOnRoot and self.comm.rank == 0): - if returnNearField: - return A, Pnear - else: - return A - else: - with self.PLogger.Timer('setup distributed op'): - if not localFarFieldIndexing: - raise NotImplementedError() - else: - dist_A = DistributedLinearOperator(A, root, Pnear, self.comm, self.dm, local_dm, lclR, lclP) - if returnNearField: - return dist_A, Pnear - else: - return dist_A - else: - if returnNearField: - return A, Pnear - else: - return A - - def getDense(self, BOOL_t trySparsification=False): - cdef: - INDEX_t cellNo1, cellNo2 - LinearOperator A = None - REAL_t[::1] contrib = self.contrib, contribZeroExterior = self.contribZeroExterior - INDEX_t start, end - meshBase surface - IndexManager iM - INDEX_t i, j, explicitZerosRow - np.int64_t explicitZeros - REAL_t[:, ::1] data - REAL_t sparsificationThreshold = 0.8 - BOOL_t symmetricLocalMatrix = self.local_matrix.symmetricLocalMatrix - BOOL_t symmetricCells = self.local_matrix.symmetricCells - MASK_t mask - - if self.comm: - start = np.ceil(self.mesh.num_cells*self.comm.rank/self.comm.size) - end = np.ceil(self.mesh.num_cells*(self.comm.rank+1)/self.comm.size) - else: - start = 0 - end = self.mesh.num_cells - - if (trySparsification - and (self.comm is None or self.comm.size == 1) - and not self.zeroExterior - and self.dm2 is None - and self.kernel.finiteHorizon - and (self.mesh.volume*(1.-sparsificationThreshold) > self.kernel.getHorizonValue()**self.mesh.dim)): - - with self.PLogger.Timer('build sparsity pattern'): - - sP = sparsityPattern(self.dm.num_dofs) - iM = IndexManager(self.dm, None, sP=sP) - - for cellNo1 in range(start, end): - self.local_matrix.setCell1(cellNo1) - for cellNo2 in range(cellNo1, self.mesh.num_cells): - self.local_matrix.setCell2(cellNo2) - if iM.getDoFsElemElem(cellNo1, cellNo2): - continue - panel = self.local_matrix.getPanelType() - if cellNo1 == cellNo2: - if panel != IGNORED: - if self.local_matrix.symmetricLocalMatrix: - iM.addToSparsityElemElemSym() - else: - iM.addToSparsityElemElem() - else: - if self.local_matrix.symmetricCells: - if panel != IGNORED: - if self.local_matrix.symmetricLocalMatrix: - iM.addToSparsityElemElemSym() - else: - iM.addToSparsityElemElem() - else: - if panel != IGNORED: - if self.local_matrix.symmetricLocalMatrix: - iM.addToSparsityElemElemSym() - else: - iM.addToSparsityElemElem() - self.local_matrix.swapCells() - panel = self.local_matrix.getPanelType() - if panel != IGNORED: - if iM.getDoFsElemElem(cellNo2, cellNo1): - continue - if self.local_matrix.symmetricLocalMatrix: - iM.addToSparsityElemElemSym() - else: - iM.addToSparsityElemElem() - self.local_matrix.swapCells() - indptr, indices = sP.freeze() - useSymmetricMatrix = self.local_matrix.symmetricLocalMatrix and self.local_matrix.symmetricCells - if useSymmetricMatrix: - A = SSS_LinearOperator(indices, indptr, - np.zeros((indices.shape[0]), dtype=REAL), - np.zeros((self.dm.num_dofs), dtype=REAL)) - ratio = ((A.nnz+A.num_rows)/REAL(A.num_rows))/REAL(A.num_columns) - else: - A = CSR_LinearOperator(indices, indptr, - np.zeros((indices.shape[0]), dtype=REAL)) - ratio = (A.nnz/REAL(A.num_rows))/REAL(A.num_columns) - LOGGER.warning('Assembling into sparse{} matrix, since {}% of entries are zero.'.format(', symmetric' if useSymmetricMatrix else '', - 100.*(1.-ratio))) - trySparsification = False - else: - if self.dm2 is None: - A = Dense_LinearOperator(np.zeros((self.dm.num_dofs, self.dm.num_dofs), dtype=REAL)) - else: - A = Dense_LinearOperator(np.zeros((self.dm.num_dofs, self.dm2.num_dofs), dtype=REAL)) - - if self.dm2 is None: - iM = IndexManager(self.dm, A) - else: - LOGGER.warning('Efficiency of assembly with 2 DoFMaps is bad.') - dmCombined = self.dm.combine(self.dm2) - B = SubMatrixAssemblyOperator(A, - np.arange(self.dm.num_dofs, dtype=INDEX), - np.arange(self.dm.num_dofs, self.dm.num_dofs+self.dm2.num_dofs, dtype=INDEX)) - iM = IndexManager(dmCombined, B) - - # Omega x Omega - with self.PLogger.Timer('interior'): - for cellNo1 in range(start, end): - self.local_matrix.setCell1(cellNo1) - for cellNo2 in range(cellNo1, self.mesh.num_cells): - self.local_matrix.setCell2(cellNo2) - if iM.getDoFsElemElem(cellNo1, cellNo2): - continue - panel = self.local_matrix.getPanelType() - if cellNo1 == cellNo2: - if panel != IGNORED: - self.local_matrix.eval(contrib, panel) - if symmetricLocalMatrix: - iM.addToMatrixElemElemSym(contrib, 1.) - else: - iM.addToMatrixElemElem(contrib, 1.) - else: - if symmetricCells: - if panel != IGNORED: - self.local_matrix.eval(contrib, panel) - # If the kernel is symmetric, the contributions from (cellNo1, cellNo2) and (cellNo2, cellNo1) - # are the same. We multiply by 2 to account for the contribution from cells (cellNo2, cellNo1). - if symmetricLocalMatrix: - iM.addToMatrixElemElemSym(contrib, 2.) - else: - iM.addToMatrixElemElem(contrib, 2.) - else: - if panel != IGNORED: - self.local_matrix.eval(contrib, panel) - if symmetricLocalMatrix: - iM.addToMatrixElemElemSym(contrib, 1.) - else: - iM.addToMatrixElemElem(contrib, 1.) - self.local_matrix.swapCells() - panel = self.local_matrix.getPanelType() - if panel != IGNORED: - if iM.getDoFsElemElem(cellNo2, cellNo1): - continue - self.local_matrix.eval(contrib, panel) - if symmetricLocalMatrix: - iM.addToMatrixElemElemSym(contrib, 1.) - else: - iM.addToMatrixElemElem(contrib, 1.) - self.local_matrix.swapCells() - - # Omega x Omega^C - if self.zeroExterior: - with self.PLogger.Timer('zeroExterior'): - surface = self.mesh.get_surface_mesh() - - self.local_matrix_zeroExterior.setMesh2(surface) - - for cellNo1 in range(start, end): - iM.getDoFsElem(cellNo1) - mask = iM.getElemSymMask() - self.local_matrix_zeroExterior.setCell1(cellNo1) - for cellNo2 in range(surface.num_cells): - self.local_matrix_zeroExterior.setCell2(cellNo2) - panel = self.local_matrix_zeroExterior.getPanelType() - self.local_matrix_zeroExterior.eval(contribZeroExterior, panel, mask) - # if local_matrix_zeroExterior.symmetricLocalMatrix: - iM.addToMatrixElemSym(contribZeroExterior, 1.) - # else: - # raise NotImplementedError() - if self.comm: - self.comm.Allreduce(MPI.IN_PLACE, A.data) - if trySparsification: - explicitZeros = 0 - data = A.data - nr = A.num_rows - for i in range(A.num_rows): - explicitZerosRow = 0 - for j in range(A.num_columns): - if data[i, j] == 0.: - explicitZerosRow += 1 - explicitZeros += explicitZerosRow - if not (explicitZerosRow > sparsificationThreshold*A.num_columns): - nr = i+1 - break - ratio = (explicitZeros/REAL(nr))/REAL(A.num_columns) - if ratio > sparsificationThreshold: - LOGGER.warning('Converting dense to sparse matrix, since {}% of entries are zero.'.format(100.*ratio)) - return CSR_LinearOperator.from_dense(A) - else: - LOGGER.warning('Not converting dense to sparse matrix, since only {}% of entries are zero.'.format(100.*ratio)) - return A - - def getDenseVector(self, BOOL_t trySparsification=False): - cdef: - INDEX_t cellNo1, cellNo2 - VectorLinearOperator A = None - REAL_t[:, ::1] contrib = self.contribVector, contribZeroExterior = self.contribZeroExteriorVector - INDEX_t start, end - meshBase surface - IndexManagerVector iM - INDEX_t i, j, explicitZerosRow - np.int64_t explicitZeros - REAL_t[:, ::1] data - REAL_t sparsificationThreshold = 0.8 - BOOL_t symmetricLocalMatrix = self.local_matrix.symmetricLocalMatrix - BOOL_t symmetricCells = self.local_matrix.symmetricCells - MASK_t mask - - if self.comm: - start = np.ceil(self.mesh.num_cells*self.comm.rank/self.comm.size) - end = np.ceil(self.mesh.num_cells*(self.comm.rank+1)/self.comm.size) - else: - start = 0 - end = self.mesh.num_cells - - - if self.dm2 is None: - A = Dense_VectorLinearOperator(np.zeros((self.dm.num_dofs, self.dm.num_dofs, self.kernel.vectorSize), dtype=REAL)) - else: - A = Dense_VectorLinearOperator(np.zeros((self.dm.num_dofs, self.dm2.num_dofs, self.kernel.vectorSize), dtype=REAL)) - - if self.dm2 is None: - iM = IndexManagerVector(self.dm, A) - else: - LOGGER.warning('Efficiency of assembly with 2 DoFMaps is bad.') - dmCombined = self.dm.combine(self.dm2) - B = SubMatrixAssemblyOperator(A, - np.arange(self.dm.num_dofs, dtype=INDEX), - np.arange(self.dm.num_dofs, self.dm.num_dofs+self.dm2.num_dofs, dtype=INDEX)) - iM = IndexManagerVector(dmCombined, B) - - # Omega x Omega - with self.PLogger.Timer('interior'): - for cellNo1 in range(start, end): - self.local_matrix.setCell1(cellNo1) - for cellNo2 in range(cellNo1, self.mesh.num_cells): - self.local_matrix.setCell2(cellNo2) - if iM.getDoFsElemElem(cellNo1, cellNo2): - continue - panel = self.local_matrix.getPanelType() - if cellNo1 == cellNo2: - if panel != IGNORED: - self.local_matrix.evalVector(contrib, panel) - if symmetricLocalMatrix: - iM.addToMatrixElemElemSymVector(contrib, 1.) - else: - iM.addToMatrixElemElemVector(contrib, 1.) - else: - if symmetricCells: - if panel != IGNORED: - self.local_matrix.evalVector(contrib, panel) - # If the kernel is symmetric, the contributions from (cellNo1, cellNo2) and (cellNo2, cellNo1) - # are the same. We multiply by 2 to account for the contribution from cells (cellNo2, cellNo1). - if symmetricLocalMatrix: - iM.addToMatrixElemElemSymVector(contrib, 2.) - else: - iM.addToMatrixElemElemVector(contrib, 2.) - else: - if panel != IGNORED: - self.local_matrix.evalVector(contrib, panel) - if symmetricLocalMatrix: - iM.addToMatrixElemElemSymVector(contrib, 1.) - else: - iM.addToMatrixElemElemVector(contrib, 1.) - self.local_matrix.swapCells() - panel = self.local_matrix.getPanelType() - if panel != IGNORED: - if iM.getDoFsElemElem(cellNo2, cellNo1): - continue - self.local_matrix.evalVector(contrib, panel) - if symmetricLocalMatrix: - iM.addToMatrixElemElemSymVector(contrib, 1.) - else: - iM.addToMatrixElemElemVector(contrib, 1.) - self.local_matrix.swapCells() - - # Omega x Omega^C - if self.zeroExterior: - with self.PLogger.Timer('zeroExterior'): - surface = self.mesh.get_surface_mesh() - - self.local_matrix_zeroExterior.setMesh2(surface) - - for cellNo1 in range(start, end): - iM.getDoFsElem(cellNo1) - mask = iM.getElemSymMask() - self.local_matrix_zeroExterior.setCell1(cellNo1) - for cellNo2 in range(surface.num_cells): - self.local_matrix_zeroExterior.setCell2(cellNo2) - panel = self.local_matrix_zeroExterior.getPanelType() - self.local_matrix_zeroExterior.evalVector(contribZeroExterior, panel, mask) - # if local_matrix_zeroExterior.symmetricLocalMatrix: - iM.addToMatrixElemSymVector(contribZeroExterior, 1.) - # else: - # raise NotImplementedError() - if self.comm: - self.comm.Allreduce(MPI.IN_PLACE, A.data) - return A - - cpdef REAL_t getEntryCluster(self, INDEX_t I, INDEX_t J): - cdef: - tree_node n1, n2, n3 - list clusters = [] - nearFieldClusterPair c1, c2, c3 - arrayIndexSet aI1, aI2, aI3 - REAL_t[:, :, ::1] fake_boxes = uninitialized((0, 0, 0), dtype=REAL) - INDEX_t[::1] I_view = np.array([I], dtype=INDEX) - INDEX_t[::1] J_view = np.array([J], dtype=INDEX) - arrayIndexSetIterator it = arrayIndexSetIterator() - list d2c = self.d2c - LinearOperator A - REAL_t[:, ::1] mat = np.zeros((1, 1), dtype=REAL) - if I == J: - aI3 = arrayIndexSet(I_view) - n3 = tree_node(None, aI3, fake_boxes) - - cells = set() - it.setIndexSet(aI3) - while it.step(): - cells |= d2c[it.i] - n3._cells = arrayIndexSet() - n3._cells.fromSet(cells) - - c3 = nearFieldClusterPair(n3, n3) - c3.set_cells() - clusters.append(c3) - else: - aI1 = arrayIndexSet(I_view) - aI2 = arrayIndexSet(J_view) - n1 = tree_node(None, aI1, fake_boxes) - n2 = tree_node(None, aI2, fake_boxes) - - cells = set() - it.setIndexSet(aI1) - while it.step(): - cells |= d2c[it.i] - n1._cells = arrayIndexSet() - n1._cells.fromSet(cells) - - cells = set() - it.setIndexSet(aI2) - while it.step(): - cells |= d2c[it.i] - n2._cells = arrayIndexSet() - n2._cells.fromSet(cells) - - c1 = nearFieldClusterPair(n1, n2) - c1.set_cells() - c2 = nearFieldClusterPair(n2, n1) - c2.set_cells() - clusters.append(c1) - clusters.append(c2) - A = Dense_SubBlock_LinearOperator(I_view, - J_view, - self.dm.num_dofs, - self.dm.num_dofs, - mat) - self.assembleClusters(clusters, Anear=A) - return mat[0, 0] - - cpdef REAL_t getEntry(self, INDEX_t I, INDEX_t J): - cdef: - INDEX_t cellNo1, cellNo2 - INDEX_t[:,::1] surface_cells - MASK_t mask - indexSet cellsUnion = arrayIndexSet() - indexSet cellsInter = arrayIndexSet() - indexSetIterator it1 = arrayIndexSetIterator() - indexSetIterator it2 = arrayIndexSetIterator() - dm = self.dm - REAL_t entry = 0. - cellsUnion.fromSet(self.d2c[I] | self.d2c[J]) - cellsInter.fromSet(self.d2c[I] & self.d2c[J]) - - assert isinstance(self.kernel.horizon, constant) and self.kernel.horizon.value == np.inf - - # (supp phi_I \cup supp phi_J)^2 - it1.setIndexSet(cellsUnion) - it2.setIndexSet(cellsUnion) - while it1.step(): - cellNo1 = it1.i - self.local_matrix.setCell1(cellNo1) - it2.reset() - while it2.step(): - cellNo2 = it2.i - if cellNo2 < cellNo1: - continue - mask = getElemElemSymMask(dm, cellNo1, cellNo2, I, J) - if mask.none(): - continue - self.local_matrix.setCell2(cellNo2) - panel = self.local_matrix.getPanelType() - if cellNo1 == cellNo2: - self.local_matrix.eval(self.contrib, panel, mask) - if self.local_matrix.symmetricLocalMatrix: - entry += extractElemElemSymMasked(dm, self.contrib, 1., mask) - else: - raise NotImplementedError() - else: - if self.local_matrix.symmetricCells: - if panel != IGNORED: - self.local_matrix.eval(self.contrib, panel, mask) - # multiply by 2 to account for the contribution from cells (cellNo2, cellNo1) - if self.local_matrix.symmetricLocalMatrix: - entry += extractElemElemSymMasked(dm, self.contrib, 2., mask) - else: - raise NotImplementedError() - else: - if panel != IGNORED: - self.local_matrix.eval(self.contrib, panel, mask) - # multiply by 2 to account for the contribution from cells (cellNo2, cellNo1) - if self.local_matrix.symmetricLocalMatrix: - entry += extractElemElemSymMasked(dm, self.contrib, 1., mask) - else: - raise NotImplementedError() - self.local_matrix.swapCells() - mask = getElemElemSymMask(dm, cellNo2, cellNo1, I, J) - panel = self.local_matrix.getPanelType() - if panel != IGNORED: - self.local_matrix.eval(self.contrib, panel, mask) - if self.local_matrix.symmetricLocalMatrix: - entry += extractElemElemSymMasked(dm, self.contrib, 1., mask) - else: - raise NotImplementedError() - # (supp phi_I \cup supp phi_J) x (supp phi_I \cup supp phi_J)^C - if not self.kernel.variable: - if self.zeroExterior: - # zeroExterior of (supp phi_I \cup supp phi_J) - if self.mesh.dim == 1: - surface_cells = boundaryVertices(self.mesh.cells, cellsUnion) - elif self.mesh.dim == 2: - surface_cells = boundaryEdges(self.mesh.cells, cellsUnion) - else: - raise NotImplementedError() - - self.local_matrix_zeroExterior.setVerticesCells2(self.mesh.vertices, surface_cells) - - it1.setIndexSet(cellsInter) - while it1.step(): - cellNo1 = it1.i - self.local_matrix_zeroExterior.setCell1(cellNo1) - mask = getElemSymMask(dm, cellNo1, I, J) - for cellNo2 in range(surface_cells.shape[0]): - self.local_matrix_zeroExterior.setCell2(cellNo2) - panel = self.local_matrix_zeroExterior.getPanelType() - self.local_matrix_zeroExterior.eval(self.contribZeroExterior, panel) - entry += extractElemSymMasked(dm, self.contribZeroExterior, 1., mask) - else: - # (supp phi_I \cup supp phi_J) x (Omega \ (supp phi_I \cup supp phi_J)) - # TODO: This can be done using surface integrals instead - it1.setIndexSet(cellsUnion) - while it1.step(): - cellNo1 = it1.i - self.local_matrix.setCell1(cellNo1) - - for cellNo2 in set(range(self.mesh.num_cells))-cellsUnion.toSet(): - self.local_matrix.setCell2(cellNo2) - mask = getElemElemSymMask(dm, cellNo1, cellNo2, I, J) - panel = self.local_matrix.getPanelType() - if panel != IGNORED: - if self.local_matrix.symmetricLocalMatrix: - # multiply by 2 to account for the 2 symmetric contributions - self.local_matrix.eval(self.contrib, panel) - entry += extractElemElemSymMasked(dm, self.contrib, 1., mask) - else: - raise NotImplementedError() - - if self.zeroExterior: - # (supp phi_I \cup supp phi_J) x Omega^C - surface = self.mesh.get_surface_mesh() - self.local_matrix_zeroExterior.setMesh2(surface) - - it1.setIndexSet(cellsInter) - while it1.step(): - cellNo1 = it1.i - self.local_matrix_zeroExterior.setCell1(cellNo1) - mask = getElemSymMask(dm, cellNo1, I, J) - for cellNo2 in range(surface.num_cells): - self.local_matrix_zeroExterior.setCell2(cellNo2) - panel = self.local_matrix_zeroExterior.getPanelType() - self.local_matrix_zeroExterior.eval(self.contribZeroExterior, panel) - entry += extractElemSymMasked(dm, self.contribZeroExterior, 1., mask) - return entry - - cpdef LinearOperator assembleClusters(self, list Pnear, bint forceUnsymmetric=False, LinearOperator Anear=None, dict jumps={}, str prefix='', tree_node myRoot=None, BOOL_t doDistributedAssembly=False): - cdef: - INDEX_t cellNo1, cellNo2, cellNo3 - REAL_t fac - REAL_t[::1] contrib = self.contrib, contribZeroExterior = self.contribZeroExterior - meshBase surface - INDEX_t[:, ::1] cells = self.mesh.cells, surface_cells, fake_cells - indexSet cellsInter - indexSet clusterDofs1, clusterDofs2 - FilteredAssemblyOperator Anear_filtered = None - INDEX_t[::1] cellPair = uninitialized((2), dtype=INDEX) - nearFieldClusterPair cluster - panelType panel - tupleDictMASK masks = None - ENCODE_t hv, hv2 - MASK_t mask - # INDEX_t vertex1, vertex2 - bint useSymmetricMatrix - bint useSymmetricCells - INDEX_t vertexNo, i - INDEX_t[::1] edge = uninitialized((2), dtype=INDEX) - REAL_t evalShift = 1e-9 - local_matrix_t mass - indexSetIterator it = arrayIndexSetIterator() - INDEX_t startCluster - INDEX_t numAssembledCells - indexSet myDofs = None - REAL_t sValuePre, sValuePost - BOOL_t surfaceIntegralNeedsShift - - mask.reset() - - if myRoot is not None: - myDofs = myRoot.get_dofs() - - if Anear is None: - useSymmetricMatrix = self.local_matrix.symmetricLocalMatrix and self.local_matrix.symmetricCells and not forceUnsymmetric - with self.PLogger.Timer(prefix+'build near field sparsity pattern'): - # TODO: double check that this should not be - if myRoot is not None and doDistributedAssembly: - Anear = getSparseNearField(self.dm, Pnear, symmetric=useSymmetricMatrix, myRoot=myRoot) - else: - Anear = getSparseNearField(self.dm, Pnear, symmetric=useSymmetricMatrix) - LOGGER.info('Anear: {}'.format(Anear)) - - if self.comm is not None and self.comm.size > 1: - nnz = Anear.nnz - counts = np.zeros((self.comm.size), dtype=INDEX) - self.comm.Gather(np.array([nnz], dtype=INDEX), counts) - LOGGER.info('Near field entries per rank: {} ({}) / {} / {} ({})'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax())) - - Anear_filtered = FilteredAssemblyOperator(Anear) - - useSymmetricCells = self.local_matrix.symmetricCells - - iM = IndexManager(self.dm, Anear) - - use_masks = self.params.get('use_masks', True) - - with self.PLogger.Timer(prefix+'interior'): - # This corresponds to - # C(d,s) \int_D \int_D (u(x)-u(y)) (v(x)-v(y)) /|x-y|^{d+2s} - # where - # D = (supp u) \cup (supp v)., - # We only update unknowns that are in the cluster pair. - - if not use_masks: - # This loop does the correct thing, but we are wasting a lot of - # element x element evaluations. - for cluster in Pnear: - cellsUnion = cluster.cellsUnion - - clusterDofs1 = cluster.n1.get_dofs() - clusterDofs2 = cluster.n2.get_dofs() - Anear_filtered.setFilter(clusterDofs1, clusterDofs2) - iM = IndexManager(self.dm, Anear_filtered) - - for cellNo1 in cellsUnion: - self.local_matrix.setCell1(cellNo1) - for cellNo2 in cellsUnion: - self.local_matrix.setCell2(cellNo2) - panel = self.local_matrix.getPanelType() - if panel != IGNORED: - if useSymmetricCells and (cellNo1 != cellNo2): - fac = 2. - else: - fac = 1. - if iM.getDoFsElemElem(cellNo1, cellNo2): - continue - self.local_matrix.eval(contrib, panel) - if useSymmetricCells: - iM.addToMatrixElemElemSym(contrib, fac) - else: - iM.addToMatrixElemElem(contrib, fac) - else: - # Pre-record all element x element contributions. - # This way, we only assembly over each element x element pair once. - # We load balance the cells and only get the list for the local rank. - startCluster = 0 - numAssembledCells = 0 - while startCluster < len(Pnear): - with self.PLogger.Timer(prefix+'interior - build masks'): - masks = iM.buildMasksForClusters(Pnear, useSymmetricCells, &startCluster) - - if (masks.getSizeInBytes() >> 20) > 20: - LOGGER.info('element x element pairs {}, {} MB'.format(masks.nnz, masks.getSizeInBytes() >> 20)) - # Compute all element x element contributions - with self.PLogger.Timer(prefix+'interior - compute'): - masks.startIter() - while masks.next(cellPair, &mask): - cellNo1 = cellPair[0] - cellNo2 = cellPair[1] - self.local_matrix.setCell1(cellNo1) - self.local_matrix.setCell2(cellNo2) - panel = self.local_matrix.getPanelType() - if panel != IGNORED: - numAssembledCells += 1 - if useSymmetricCells and (cellNo1 != cellNo2): - fac = 2. - else: - fac = 1. - if iM.getDoFsElemElem(cellNo1, cellNo2): - continue - self.local_matrix.eval(contrib, panel, mask) - if useSymmetricCells: - iM.addToMatrixElemElemSymMasked(contrib, fac, mask) - else: - iM.addToMatrixElemElemMasked(contrib, fac, mask) - masks = None - if self.comm is not None and self.comm.size > 1: - counts = np.zeros((self.comm.size), dtype=INDEX) - self.comm.Gather(np.array([numAssembledCells], dtype=INDEX), counts) - if self.comm.rank == 0: - LOGGER.info('Num assembled cells per rank: {} ({}) / {} / {} ({}) imbalance: {}'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax(), counts.max()/counts.min())) - - if not self.kernel.variable: - if not self.kernel.complement: - with self.PLogger.Timer(prefix+'cluster zeroExterior'): - # This corresponds to - # C(d,s)/(2s) \int_D u(x) v(x) \int_E n.(x-y)/|x-y|^{d+2s} - # where - # D = (supp u) \cap (supp v) \subset E, - # E = \partial((supp u) \cup (supp v)). - # We only update unknowns that are in the cluster pair. - - iM = IndexManager(self.dm, Anear_filtered) - - for cluster in Pnear: - - cellsInter = cluster.cellsInter - if len(cellsInter) == 0: - continue - - clusterDofs1 = cluster.n1.get_dofs() - clusterDofs2 = cluster.n2.get_dofs() - - # surface of the union of clusters n1 and n2 - if self.mesh.dim == 1: - surface_cells = boundaryVertices(cells, cluster.cellsUnion) - elif self.mesh.dim == 2: - surface_cells = boundaryEdges(cells, cluster.cellsUnion) - else: - raise NotImplementedError() - - Anear_filtered.setFilter(clusterDofs1, clusterDofs2) - - self.local_matrix_zeroExterior.setVerticesCells2(self.mesh.vertices, surface_cells) - - it.setIndexSet(cellsInter) - while it.step(): - cellNo1 = it.i - self.local_matrix_zeroExterior.setCell1(cellNo1) - iM.getDoFsElem(cellNo1) - mask = iM.getElemSymMask() - for cellNo2 in range(surface_cells.shape[0]): - self.local_matrix_zeroExterior.setCell2(cellNo2) - panel = self.local_matrix_zeroExterior.getPanelType() - self.local_matrix_zeroExterior.eval(contribZeroExterior, panel, mask) - if self.local_matrix_zeroExterior.symmetricLocalMatrix: - iM.addToMatrixElemSym(contribZeroExterior, 1.) - else: - raise NotImplementedError() - if not self.zeroExterior and not self.kernel.finiteHorizon: - with self.PLogger.Timer(prefix+'zeroExterior'): - # Subtract the zeroExterior contribution for Omega x Omega^C that was added in the previous loop. - # This is for the regional fractional Laplacian. - surface = self.mesh.get_surface_mesh() - iM = IndexManager(self.dm, Anear, myDofs=myDofs) - - self.local_matrix_zeroExterior.setMesh2(surface) - - for cellNo1 in range(self.mesh.num_cells): - self.local_matrix_zeroExterior.setCell1(cellNo1) - iM.getDoFsElem(cellNo1) - mask = iM.getElemSymMask() - for cellNo2 in range(surface.num_cells): - self.local_matrix_zeroExterior.setCell2(cellNo2) - panel = self.local_matrix_zeroExterior.getPanelType() - self.local_matrix_zeroExterior.eval(contribZeroExterior, panel, mask) - if self.local_matrix_zeroExterior.symmetricLocalMatrix: - iM.addToMatrixElemSym(contribZeroExterior, -1.) - else: - raise NotImplementedError() - elif not self.zeroExterior and self.kernel.finiteHorizon: - with self.PLogger.Timer(prefix+'zeroExterior'): - # Subtract the zeroExterior contribution for Omega x Omega^C that was added in the previous loop. - # This is for the regional fractional Laplacian. - - if self.mesh.dim == 1: - vol = 2 - elif self.mesh.dim == 2: - vol = 2*np.pi * self.kernel.horizonValue - else: - raise NotImplementedError() - x = np.zeros((self.mesh.dim), dtype=REAL) - y = np.zeros((self.mesh.dim), dtype=REAL) - y[0] = self.kernel.horizonValue - coeff = constant(-vol*self.local_matrix_zeroExterior.kernel(x, y)) - qr = simplexXiaoGimbutas(2, self.mesh.dim) - if self.mesh.dim == 1: - mass = mass_1d_sym_scalar_anisotropic(coeff, self.dm, qr) - elif self.mesh.dim == 2: - mass = mass_2d_sym_scalar_anisotropic(coeff, self.dm, qr) - else: - raise NotImplementedError() - - if myDofs is not None: - Anear_filtered2 = LeftFilteredAssemblyOperator(Anear) - Anear_filtered2.setFilter(myDofs) - assembleMatrix(self.mesh, self.dm, mass, A=Anear_filtered2) - else: - assembleMatrix(self.mesh, self.dm, mass, A=Anear) - - elif self.zeroExterior and not self.kernel.complement: - with self.PLogger.Timer(prefix+'zeroExterior'): - # Add the zeroExterior contribution for Omega x Omega^C. - surface = self.mesh.get_surface_mesh() - iM = IndexManager(self.dm, Anear, myDofs=myDofs) - self.local_matrix_zeroExterior.setMesh2(surface) - - for cellNo1 in range(self.mesh.num_cells): - self.local_matrix_zeroExterior.setCell1(cellNo1) - iM.getDoFsElem(cellNo1) - mask = iM.getElemSymMask() - for cellNo2 in range(surface.num_cells): - self.local_matrix_zeroExterior.setCell2(cellNo2) - panel = self.local_matrix_zeroExterior.getPanelType() - self.local_matrix_zeroExterior.eval(contribZeroExterior, panel, mask) - iM.addToMatrixElemSym(contribZeroExterior, 1.) - - else: - surfaceIntegralNeedsShift = not isinstance(self.kernel.s, singleVariableUnsymmetricFractionalOrder) - - if not self.kernel.complement: - # This corresponds to - # \int_D \int_E u(x) v(x) C(d, s) / |x-y|^{d+2s} - # where - # D = (supp u) \cap (supp v) \subset E, - # E = Omega \ ((supp u) \cup (supp v)). - # We only update unknowns that are in the cluster pair. - with self.PLogger.Timer(prefix+'cluster exterior'): - iM = IndexManager(self.dm, Anear_filtered) - - fake_cells = uninitialized((1, self.mesh.dim), dtype=INDEX) - for cluster in Pnear: - - cellsInter = cluster.cellsInter - if len(cellsInter) == 0: - continue - - clusterDofs1 = cluster.n1.get_dofs() - clusterDofs2 = cluster.n2.get_dofs() - - Anear_filtered.setFilter(clusterDofs1, clusterDofs2) - - if not self.kernel.complement: - - # surface of the union of clusters n1 and n2 - if self.mesh.dim == 1: - surface_cells = boundaryVertices(cells, cluster.cellsUnion) - elif self.mesh.dim == 2: - surface_cells = boundaryEdges(cells, cluster.cellsUnion) - else: - raise NotImplementedError() - self.local_matrix_surface.setVerticesCells2(self.mesh.vertices, surface_cells) - - it.setIndexSet(cellsInter) - while it.step(): - cellNo1 = it.i - self.local_matrix_surface.setCell1(cellNo1) - iM.getDoFsElem(cellNo1) - mask = iM.getElemSymMask() - for cellNo2 in range(surface_cells.shape[0]): - self.local_matrix_surface.setCell2(cellNo2) - if surfaceIntegralNeedsShift: - if self.mesh.dim == 1: - if self.local_matrix_surface.center1[0] < self.local_matrix_surface.center2[0]: - self.local_matrix_surface.center2[0] += evalShift - else: - self.local_matrix_surface.center2[0] -= evalShift - elif self.mesh.dim == 2: - self.local_matrix_surface.center2[0] += evalShift*(self.local_matrix_surface.simplex2[1, 1]-self.local_matrix_surface.simplex2[0, 1]) - self.local_matrix_surface.center2[1] -= evalShift*(self.local_matrix_surface.simplex2[1, 0]-self.local_matrix_surface.simplex2[0, 0]) - panel = self.local_matrix_surface.getPanelType() - if panel != IGNORED: - self.local_matrix_surface.eval(contribZeroExterior, panel, mask) - # if self.local_matrix_surface.symmetricLocalMatrix: - iM.addToMatrixElemSym(contribZeroExterior, 1.) - # else: - # print('here', np.array(contribZeroExterior)) - # iM.addToMatrixElem(contribZeroExterior, 1.) - # integrate all the jump interfaces - for hv in jumps: - decode_edge(hv, cellPair) - if not (cluster.cellsUnion.inSet(cellPair[0]) or - cluster.cellsUnion.inSet(cellPair[1])): - if self.mesh.dim == 1: - fake_cells[0, 0] = jumps[hv] - elif self.mesh.dim == 2: - hv2 = jumps[hv] - decode_edge(hv2, edge) - for vertexNo in range(self.mesh.dim): - fake_cells[0, vertexNo] = edge[vertexNo] - else: - raise NotImplementedError() - self.local_matrix_surface.setVerticesCells2(self.mesh.vertices, fake_cells) - self.local_matrix_surface.setCell2(0) - - if surfaceIntegralNeedsShift: - if self.mesh.dim == 1: - self.local_matrix_surface.center2[0] += evalShift - elif self.mesh.dim == 2: - self.local_matrix_surface.center2[0] += evalShift*(self.local_matrix_surface.simplex2[1, 1]-self.local_matrix_surface.simplex2[0, 1]) - self.local_matrix_surface.center2[1] += evalShift*(self.local_matrix_surface.simplex2[0, 0]-self.local_matrix_surface.simplex2[1, 0]) - - it.setIndexSet(cellsInter) - while it.step(): - cellNo3 = it.i - self.local_matrix_surface.setCell1(cellNo3) - panel = self.local_matrix_surface.getPanelType() - if panel != IGNORED: - if self.mesh.dim == 1: - if self.local_matrix_surface.center1[0] < self.local_matrix_surface.center2[0]: - fac = 1. - else: - fac = -1. - else: - fac = 1. - self.local_matrix_surface.eval(contribZeroExterior, panel) - iM.getDoFsElem(cellNo3) - if self.local_matrix_surface.symmetricLocalMatrix: - iM.addToMatrixElemSym(contribZeroExterior, fac) - else: - iM.addToMatrixElem(contribZeroExterior, fac) - sValuePre = self.local_matrix_surface.kernel.sValue - - if surfaceIntegralNeedsShift: - if self.mesh.dim == 1: - self.local_matrix_surface.center2[0] -= 2.*evalShift - elif self.mesh.dim == 2: - self.local_matrix_surface.center2[0] -= 2.*evalShift*(self.local_matrix_surface.simplex2[1, 1]-self.local_matrix_surface.simplex2[0, 1]) - self.local_matrix_surface.center2[1] -= 2.*evalShift*(self.local_matrix_surface.simplex2[0, 0]-self.local_matrix_surface.simplex2[1, 0]) - - it.reset() - while it.step(): - cellNo3 = it.i - self.local_matrix_surface.setCell1(cellNo3) - panel = self.local_matrix_surface.getPanelType() - if panel != IGNORED: - if self.mesh.dim == 1: - if self.local_matrix_surface.center1[0] < self.local_matrix_surface.center2[0]: - fac = -1. - else: - fac = 1. - else: - fac = -1. - self.local_matrix_surface.eval(contribZeroExterior, panel) - iM.getDoFsElem(cellNo3) - # if self.local_matrix_surface.symmetricLocalMatrix: - iM.addToMatrixElemSym(contribZeroExterior, fac) - # else: - # iM.addToMatrixElem(contribZeroExterior, fac) - sValuePost = self.local_matrix_surface.kernel.sValue - if abs(sValuePre-sValuePost) < 1e-9: - print(np.array(self.local_matrix_surface.simplex2)) - assert False, "Jump of fractional order between elements is zero (Value = {}). Check that the mesh aligns with the jump in the fractional order.".format(sValuePre) - if not self.zeroExterior and not self.kernel.finiteHorizon: - with self.PLogger.Timer(prefix+'zeroExterior'): - # Subtract the zeroExterior contribution for Omega x Omega^C that was added in the previous loop. - # This is for the regional fractional Laplacian. - surface = self.mesh.get_surface_mesh() - iM = IndexManager(self.dm, Anear, myDofs=myDofs) - - self.local_matrix_zeroExterior.setMesh2(surface) - - for cellNo1 in range(self.mesh.num_cells): - self.local_matrix_zeroExterior.setCell1(cellNo1) - iM.getDoFsElem(cellNo1) - mask = iM.getElemSymMask() - for cellNo2 in range(surface.num_cells): - self.local_matrix_zeroExterior.setCell2(cellNo2) - if self.mesh.dim == 1: - if self.local_matrix_zeroExterior.center1[0] < self.local_matrix_zeroExterior.center2[0]: - self.local_matrix_zeroExterior.center2[0] += evalShift - else: - self.local_matrix_zeroExterior.center2[0] -= evalShift - elif self.mesh.dim == 2: - self.local_matrix_zeroExterior.center2[0] += evalShift*(self.local_matrix_zeroExterior.simplex2[1, 1]-self.local_matrix_zeroExterior.simplex2[0, 1]) - self.local_matrix_zeroExterior.center2[1] -= evalShift*(self.local_matrix_zeroExterior.simplex2[1, 0]-self.local_matrix_zeroExterior.simplex2[0, 0]) - panel = self.local_matrix_zeroExterior.getPanelType() - self.local_matrix_zeroExterior.eval(contribZeroExterior, panel, mask) - # if self.local_matrix_zeroExterior.symmetricLocalMatrix: - iM.addToMatrixElemSym(contribZeroExterior, -1.) - # else: - # iM.addToMatrixElem(contribZeroExterior, -1.) - elif not self.zeroExterior and self.kernel.finiteHorizon: - with self.PLogger.Timer(prefix+'zeroExterior'): - # Subtract the contribution for Omega x (\partial B_\delta(x)) - assert isinstance(self.kernel.horizon, constant) - self.local_matrix_zeroExterior.center2 = uninitialized((self.mesh.dim), dtype=REAL) - coeff = horizonSurfaceIntegral(self.local_matrix_zeroExterior.kernel, self.kernel.horizon.value) - qr = simplexXiaoGimbutas(2, self.mesh.dim) - if self.mesh.dim == 1: - mass = mass_1d_sym_scalar_anisotropic(coeff, self.dm, qr) - elif self.mesh.dim == 2: - mass = mass_2d_sym_scalar_anisotropic(coeff, self.dm, qr) - else: - raise NotImplementedError() - assembleMatrix(self.mesh, self.dm, mass, A=Anear) - - return Anear - - def reduceNearOp(self, LinearOperator Anear, indexSet myDofs): - cdef: - INDEX_t k = -1, kk, jj - INDEX_t[::1] A_indptr = Anear.indptr, A_indices = Anear.indices - REAL_t[::1] A_data = Anear.data, A_diagonal = None - INDEX_t[::1] indptr, indices - REAL_t[::1] data, diagonal = None - LinearOperator Aother - INDEX_t I, nnz - indexSetIterator it = myDofs.getIter() - counts = np.zeros((self.comm.size), dtype=INDEX) - self.comm.Gather(np.array([Anear.nnz], dtype=INDEX), counts) - if self.comm.rank == 0: - LOGGER.info('Near field entries per rank: {} ({}) / {} / {} ({}) imbalance: {}'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax(), counts.max()/counts.min())) - # drop entries that are not in rows of myRoot.dofs - Anear = self.dropOffRank(Anear, myDofs) - - A_indptr = Anear.indptr - - # sum distribute matrices by stacking rows - indptr = np.zeros((self.dm.num_dofs+1), dtype=INDEX) - for k in range(self.dm.num_dofs): - indptr[k+1] = A_indptr[k+1]-A_indptr[k] - if self.comm.rank == 0: - self.comm.Reduce(MPI.IN_PLACE, indptr, root=0) - else: - self.comm.Reduce(indptr, indptr, root=0) - - if self.comm.rank == 0: - for k in range(self.dm.num_dofs): - indptr[k+1] += indptr[k] - nnz = indptr[self.dm.num_dofs] - - indices = uninitialized((nnz), dtype=INDEX) - data = uninitialized((nnz), dtype=REAL) - if isinstance(Anear, SSS_LinearOperator): - diagonal = np.zeros((self.dm.num_dofs), dtype=REAL) - - for p in range(self.comm.size): - if p == 0: - Aother = Anear - else: - Aother = self.comm.recv(source=p) - - A_indptr = Aother.indptr - A_indices = Aother.indices - A_data = Aother.data - - for I in range(self.dm.num_dofs): - kk = indptr[I] - for jj in range(A_indptr[I], A_indptr[I+1]): - indices[kk] = A_indices[jj] - data[kk] = A_data[jj] - kk += 1 - - if isinstance(Aother, SSS_LinearOperator): - A_diagonal = Aother.diagonal - for I in range(self.dm.num_dofs): - diagonal[I] += A_diagonal[I] - - if isinstance(Anear, SSS_LinearOperator): - Anear = SSS_LinearOperator(indices, indptr, data, diagonal) - else: - Anear = CSR_LinearOperator(indices, indptr, data) - else: - self.comm.send(Anear, dest=0) - self.comm.Barrier() - - if self.comm.rank != 0: - Anear = None - else: - LOGGER.info('Anear reduced: {}'.format(Anear)) - # Anear = self.comm.bcast(Anear, root=0) - return Anear - - def dropOffRank(self, LinearOperator Anear, indexSet myDofs): - cdef: - INDEX_t k = -1, kk, jj - INDEX_t[::1] A_indptr = Anear.indptr, A_indices = Anear.indices - REAL_t[::1] A_data = Anear.data, A_diagonal = None - INDEX_t[::1] indptr, indices - REAL_t[::1] data, diagonal = None - indexSetIterator it = myDofs.getIter() - # drop entries that are not in rows of myRoot.dofs - indptr = np.zeros((self.dm.num_dofs+1), dtype=INDEX) - while it.step(): - k = it.i - indptr[k+1] = A_indptr[k+1]-A_indptr[k] - for k in range(self.dm.num_dofs): - indptr[k+1] += indptr[k] - indices = uninitialized((indptr[self.dm.num_dofs]), dtype=INDEX) - data = uninitialized((indptr[self.dm.num_dofs]), dtype=REAL) - it.reset() - while it.step(): - k = it.i - kk = indptr[k] - for jj in range(A_indptr[k], A_indptr[k+1]): - indices[kk] = A_indices[jj] - data[kk] = A_data[jj] - kk += 1 - if isinstance(Anear, SSS_LinearOperator): - A_diagonal = Anear.diagonal - diagonal = np.zeros((self.dm.num_dofs), dtype=REAL) - it.reset() - while it.step(): - k = it.i - diagonal[k] = A_diagonal[k] - Anear = SSS_LinearOperator(indices, indptr, data, diagonal) - else: - Anear = CSR_LinearOperator(indices, indptr, data) - return Anear - - def getDiagonal(self): - cdef: - diagonalOperator D - INDEX_t I - INDEX_t start, end - D = diagonalOperator(np.zeros((self.dm.num_dofs), dtype=REAL)) - if self.comm: - start = np.ceil(self.dm.num_dofs*self.comm.rank/self.comm.size) - end = np.ceil(self.dm.num_dofs*(self.comm.rank+1)/self.comm.size) - else: - start = 0 - end = self.dm.num_dofs - if self.kernel.variable: - for I in range(start, end): - D.setEntry(I, I, self.getEntryCluster(I, I)) - else: - for I in range(start, end): - D.setEntry(I, I, self.getEntry(I, I)) - if self.comm: - self.comm.Allreduce(MPI.IN_PLACE, D.data) - return D - - def getDiagonalCluster(self): - cdef: - diagonalOperator D - tree_node n - nearFieldClusterPair c - INDEX_t I - list clusters = [] - REAL_t[:, :, ::1] fake_boxes = uninitialized((0, 0, 0), dtype=REAL) - list d2c = self.d2c - D = diagonalOperator(np.zeros((self.dm.num_dofs), dtype=REAL)) - for I in range(self.dm.num_dofs): - n = tree_node(None, set([I]), fake_boxes) - n._cells = d2c[I] - c = nearFieldClusterPair(n, n) - c.set_cells() - clusters.append(c) - D = self.assembleClusters(clusters, Anear=D) - if self.comm: - self.comm.Allreduce(MPI.IN_PLACE, D.data) - return D - - def getKernelBlocksAndJumps(self): - cdef: - meshBase mesh = self.mesh - DoFMap DoFMap = self.dm - fractionalOrderBase s = self.kernel.s - REAL_t[::1] orders = None - REAL_t[::1] dofOrders - REAL_t cellOrder - dict blocks - INDEX_t[::1] cellPair = uninitialized((2), dtype=INDEX) - INDEX_t[::1] edge = uninitialized((2), dtype=INDEX) - INDEX_t cellNo, dofNo, dof, cellNo1, cellNo2, vertexNo1, vertexNo2, vertex1, vertex2, i - ENCODE_t hv - REAL_t UNASSIGNED = -np.inf - if isinstance(s, piecewiseConstantFractionalOrder): - orders = P0_DoFMap(mesh).interpolate(s.blockIndicator) - else: - orders = P0_DoFMap(mesh).interpolate(s.diagonal()) - dofOrders = np.full((DoFMap.num_dofs), fill_value=UNASSIGNED, dtype=REAL) - for cellNo in range(mesh.num_cells): - cellOrder = orders[cellNo] - for dofNo in range(DoFMap.dofs_per_element): - dof = DoFMap.cell2dof(cellNo, dofNo) - if dof >= 0: - if dofOrders[dof] == UNASSIGNED: - dofOrders[dof] = cellOrder - elif dofOrders[dof] != INTERFACE_DOF: - if dofOrders[dof] != cellOrder: - dofOrders[dof] = INTERFACE_DOF - # blocks is a dict - # value fractional order -> set of dofs - # dofs at interfaces between different fractional orders are in blocks[INTERFACE_DOF] - blocks = {} - for dof in range(DoFMap.num_dofs): - try: - blocks[dofOrders[dof]].add(dof) - except KeyError: - blocks[dofOrders[dof]] = set([dof]) - LOGGER.debug('Block sizes: '+str({key: len(blocks[key]) for key in blocks})) - - # jumps is a dict of element interfaces where the kernel has a jump. - # in 1D: - # encoded cell pair -> vertex at the interface between cells - # in 2D: - # encoded cell pair -> encoded edge - jumps = {} - cellConnectivity = mesh.getCellConnectivity(mesh.dim) - for cellNo1 in range(mesh.num_cells): - for cellNo2 in cellConnectivity[cellNo1]: - if orders[cellNo1] != orders[cellNo2]: - sortEdge(cellNo1, cellNo2, cellPair) - hv = encode_edge(cellPair) - if mesh.dim == 1: - for vertexNo1 in range(mesh.dim+1): - vertex1 = mesh.cells[cellNo1, vertexNo1] - for vertexNo2 in range(mesh.dim+1): - vertex2 = mesh.cells[cellNo2, vertexNo2] - if vertex1 == vertex2: - jumps[hv] = vertex1 - break - else: - i = 0 - for vertexNo1 in range(mesh.dim+1): - vertex1 = mesh.cells[cellNo1, vertexNo1] - for vertexNo2 in range(mesh.dim+1): - vertex2 = mesh.cells[cellNo2, vertexNo2] - if vertex1 == vertex2: - edge[i] = vertex1 - i += 1 - break - hv2 = encode_edge(edge) - jumps[hv] = hv2 - return blocks, jumps - - def getTree(self, - BOOL_t doDistributedAssembly, - refinementParams refParams, - REAL_t[:, :, ::1] boxes, - sparseGraph cells, - REAL_t[:, ::1] coords, - BOOL_t allNearField=False, - DoFMap dm=None): - cdef: - INDEX_t num_cluster_dofs - dict blocks = {}, jumps = {} - indexSet dofs, clusterDofs, subDofs, blockDofs - indexSetIterator it - REAL_t key - tree_node root, myRoot, n - - if dm is None: - dm = self.dm - - with self.PLogger.Timer('prepare tree'): - dofs = arrayIndexSet(np.arange(dm.num_dofs, dtype=INDEX), sorted=True) - root = tree_node(None, dofs, boxes, mixed_node=allNearField) - - if doDistributedAssembly: - from PyNucleus_fem.meshPartitioning import PartitionerException - - try: - root.partition(dm, self.comm, boxes, canBeAssembled=not self.kernel.variable, mixed_node=allNearField, params=self.params) - except PartitionerException: - doDistributedAssembly = False - LOGGER.warning('Falling back to serial assembly') - # check again, in case partitioning failed - if doDistributedAssembly: - myRoot = root.children[self.comm.rank] - else: - myRoot = root - - if self.kernel.variable and not (self.kernel.variableOrder and isinstance(self.kernel.s, singleVariableUnsymmetricFractionalOrder)): - blocks, jumps = self.getKernelBlocksAndJumps() - if len(jumps) > 0: - my_id = root.get_max_id()+1 - for n in root.leaves(): - clusterDofs = n.get_dofs() - num_cluster_dofs = clusterDofs.getNumEntries() - num_dofs = 0 - children = [] - for key in sorted(blocks): - blockDofs = arrayIndexSet() - blockDofs.fromSet(blocks[key]) - subDofs = blockDofs.inter(clusterDofs) - if subDofs.getNumEntries() > 0: - num_dofs += subDofs.getNumEntries() - children.append(tree_node(n, subDofs, boxes, mixed_node=key == INTERFACE_DOF)) - children[len(children)-1].id = my_id - my_id += 1 - assert num_dofs == num_cluster_dofs, (num_dofs, num_cluster_dofs) - n.children = children - n._dofs = None - # node ids are otherwise incorrect - # assert not doDistributedAssembly, "Cannot assemble variable kernel in distributed mode" - else: - for n in root.leaves(): - n.canBeAssembled = True - LOGGER.info('Jumps: {}, Block sizes: {}, Leaf nodes: {}'.format(len(jumps), str({key: len(blocks[key]) for key in blocks}), len(list(root.leaves())))) - - if doDistributedAssembly: - if self.kernel.variable: - root.irregularLevelsOffset = root.numLevels-1 - else: - root.irregularLevelsOffset = 1 - else: - root.irregularLevelsOffset = 1 - - if refParams.maxLevels <= 0: - refParams.maxLevels = root.numLevels+refParams.maxLevels - - return root, myRoot, jumps, doDistributedAssembly - - def getAdmissibleClusters(self, - tree_node root, tree_node myRoot, - BOOL_t doDistributedAssembly, - refinementParams refParams, - REAL_t[:, :, ::1] boxes, - sparseGraph cells, - REAL_t[:, ::1] coords, - BOOL_t assembleOnRoot=True, - BOOL_t ignoreDiagonalBlocks=False): - cdef: - dict Pfar = {} - list Pnear = [] - INDEX_t lvl, id1, id2 - nearFieldClusterPair cPnear - farFieldClusterPair cP - tree_node n1, n - dict added - INDEX_t N - dict node_lookup - INDEX_t dof, k - set myCells - with self.PLogger.Timer('admissible clusters'): - if doDistributedAssembly: - if assembleOnRoot: - # we need all tree nodes to be already available when we gather the far field clusters - for n in root.leaves(): - n.refine(boxes, coords, refParams, recursive=True) - - for n in root.children: - if ignoreDiagonalBlocks and (n.id == myRoot.id): - pass - getAdmissibleClusters(self.local_matrix.kernel, myRoot, n, - refParams, - Pfar=Pfar, Pnear=Pnear, - boxes1=boxes, - coords1=coords, - boxes2=boxes, - coords2=coords) - - symmetrizeNearFieldClusters(Pnear) - - counts = np.zeros((self.comm.size), dtype=INDEX) - self.comm.Gather(np.array([myRoot.num_dofs], dtype=INDEX), counts) - LOGGER.info('Unknowns per rank: {} ({}) / {} / {} ({})'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax())) - - self.comm.Gather(np.array([len(Pnear)], dtype=INDEX), counts) - LOGGER.info('Near field cluster pairs per rank: {} ({}) / {} / {} ({})'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax())) - - if assembleOnRoot: - # collect far field on rank 0 - farField = [] - for lvl in Pfar: - for cP in Pfar[lvl]: - # "lvl+1", since the ranks are children of the global root - farField.append((lvl+1, cP.n1.id, cP.n2.id)) - farField = np.array(farField, dtype=INDEX) - self.comm.Gather(np.array([farField.shape[0]], dtype=INDEX), counts) - if self.comm.rank == 0: - LOGGER.info('Far field cluster pairs per rank: {} ({}) / {} / {} ({})'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax())) - N = 0 - for rank in range(self.comm.size): - N += counts[rank] - farFieldCollected = uninitialized((N, 3), dtype=INDEX) - counts *= 3 - else: - farFieldCollected = None - self.comm.Gatherv(farField, [farFieldCollected, (counts, None)], root=0) - del farField - - if self.comm.rank == 0: - Pfar = {} - added = {} - node_lookup = {} - for n1 in root.get_tree_nodes(): - node_lookup[n1.id] = n1 - for k in range(farFieldCollected.shape[0]): - lvl, id1, id2 = farFieldCollected[k, :] - cP = farFieldClusterPair(node_lookup[id1], - node_lookup[id2]) - try: - if (id1, id2) not in added[lvl]: - Pfar[lvl].append(cP) - added[lvl].add((id1, id2)) - except KeyError: - Pfar[lvl] = [cP] - added[lvl] = set([(id1, id2)]) - del farFieldCollected - else: - Pfar = {} - else: - getAdmissibleClusters(self.local_matrix.kernel, root, root, - refParams, - Pfar=Pfar, Pnear=Pnear, - boxes1=boxes, - coords1=coords, - boxes2=boxes, - coords2=coords) - - if self.params.get('trim', True): - trimTree(root, Pnear, Pfar, self.comm) - - # Enter cells in leaf nodes - it = arrayIndexSetIterator() - for n in root.leaves(): - myCells = set() - it.setIndexSet(n.dofs) - while it.step(): - dof = it.i - for k in range(cells.indptr[dof], - cells.indptr[dof+1]): - myCells.add(cells.indices[k]) - n._cells = arrayIndexSet() - n._cells.fromSet(myCells) - del cells - - # set the cells of the near field cluster pairs - for cPnear in Pnear: - cPnear.set_cells() - return Pnear, Pfar - - def getCoveringClusters(self, - tree_node root, tree_node myRoot, - BOOL_t doDistributedAssembly, - refinementParams refParams, - REAL_t[:, :, ::1] boxes, - sparseGraph cells, - REAL_t[:, ::1] coords, - BOOL_t assembleOnRoot=True, - BOOL_t ignoreDiagonalBlocks=False): - cdef: - list Pnear = [] - nearFieldClusterPair cPnear - tree_node n - INDEX_t dof, k - set myCells - with self.PLogger.Timer('covering clusters'): - if doDistributedAssembly: - if assembleOnRoot: - # we need all tree nodes to be already available when we gather the far field clusters - for n in root.leaves(): - n.refine(boxes, coords, refParams, recursive=True) - - for n in root.children: - if ignoreDiagonalBlocks and (n.id == myRoot.id): - pass - getCoveringClusters(self.local_matrix.kernel, myRoot, n, - refParams, - Pnear, - boxes1=boxes, - coords1=coords, - boxes2=boxes, - coords2=coords) - - symmetrizeNearFieldClusters(Pnear) - - counts = np.zeros((self.comm.size), dtype=INDEX) - self.comm.Gather(np.array([myRoot.num_dofs], dtype=INDEX), counts) - LOGGER.info('Unknowns per rank: {} ({}) / {} / {} ({})'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax())) - - self.comm.Gather(np.array([len(Pnear)], dtype=INDEX), counts) - LOGGER.info('Near field cluster pairs per rank: {} ({}) / {} / {} ({})'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax())) - - else: - getCoveringClusters(self.kernel, root, root, - refParams, - Pnear, - boxes1=boxes, - coords1=coords, - boxes2=boxes, - coords2=coords) - - if self.params.get('trim', True): - trimTree(root, Pnear, {}, self.comm) - - # Enter cells in leaf nodes - it = arrayIndexSetIterator() - for n in root.leaves(): - myCells = set() - it.setIndexSet(n.dofs) - while it.step(): - dof = it.i - for k in range(cells.indptr[dof], - cells.indptr[dof+1]): - myCells.add(cells.indices[k]) - n._cells = arrayIndexSet() - n._cells.fromSet(myCells) - del cells - - # set the cells of the near field cluster pairs - for cPnear in Pnear: - cPnear.set_cells() - - return Pnear - - def getH2RefinementParams(self): - cdef: - meshBase mesh = self.mesh - refinementParams refParams - REAL_t singularity = self.kernel.max_singularity - - target_order = self.local_matrix.target_order - refParams.eta = self.params.get('eta', 3.) - - iO = self.params.get('interpolation_order', None) - if iO is None: - loggamma = abs(np.log(0.25)) - refParams.interpolation_order = max(np.ceil((2*target_order+max(-singularity, 2))*abs(np.log(mesh.hmin/mesh.diam))/loggamma/3.), 2) - else: - refParams.interpolation_order = iO - mL = self.params.get('maxLevels', None) - if mL is None: - # maxLevels = max(int(np.around(np.log2(DoFMap.num_dofs)/mesh.dim-np.log2(refParams.interpolation_order))), 0) - refParams.maxLevels = 200 - else: - refParams.maxLevels = mL - refParams.maxLevelsMixed = refParams.maxLevels - mCS = self.params.get('minClusterSize', None) - if mCS is None: - refParams.minSize = refParams.interpolation_order**mesh.dim//2 - else: - refParams.minSize = mCS - if self.kernel.finiteHorizon: - refParams.minMixedSize = max(min(self.kernel.horizon.value//(2*mesh.h)-1, refParams.minSize), 1) - else: - refParams.minMixedSize = refParams.minSize - mFFBS = self.params.get('minFarFieldBlockSize', None) - if mFFBS is None: - # For this value, size(kernelInterpolant) == size(dense block) - # If we choose a smaller value for minFarFieldBlockSize, then we use more memory, - # but we might save time, since the assembly of a far field block is cheaper than a near field block. - refParams.farFieldInteractionSize = refParams.interpolation_order**(2*mesh.dim) - else: - refParams.farFieldInteractionSize = mFFBS - - rT = self.params.get('refinementType', 'MEDIAN') - refParams.refType = {'geometric': GEOMETRIC, - 'GEOMETRIC': GEOMETRIC, - 'median': MEDIAN, - 'MEDIAN': MEDIAN, - 'barycenter': BARYCENTER, - 'BARYCENTER': BARYCENTER}[rT] - - refParams.splitEveryDim = self.params.get('splitEveryDim', False) - - refParams.attemptRefinement = True - - return refParams - - def doLocalFarFieldIndexing(self, tree_node myRoot, REAL_t[:, :, ::1] boxes): - cdef: - meshBase mesh = self.mesh - REAL_t[:, :, ::1] local_boxes = None - INDEX_t local_dof, global_dof, k, new_dof, i, j - dict lookup - CSR_LinearOperator lclR = None, lclP = None - INDEX_t[::1] newDoFsArray - unsortedArrayIndexSet newDoFs - indexSetIterator it - DoFMap local_dm = None - tree_node n - arrayIndexSet oldDoFs - with self.PLogger.Timer('localFarFieldIndexing'): - lclDoFs = myRoot.dofs.toArray() - lclIndicator = self.dm.zeros() - lclIndicator.toarray()[lclDoFs] = 1. - split = dofmapSplitter(self.dm, {'lcl': lclIndicator}) - local_dm = split.getSubMap('lcl') - local_dm.inner = ip_distributed_nonoverlapping(self.comm) - local_dm.norm = norm_distributed_nonoverlapping(self.comm) - lclR, lclP = split.getRestrictionProlongation('lcl') - lookup = {} - for local_dof in range(local_dm.num_dofs): - global_dof = lclR.indices[local_dof] - lookup[global_dof] = local_dof - for n in myRoot.leaves(): - oldDoFs = n._dofs - newDoFsArray = uninitialized((oldDoFs.getNumEntries()), dtype=INDEX) - k = 0 - it = oldDoFs.getIter() - while it.step(): - dof = it.i - new_dof = lookup[dof] - newDoFsArray[k] = new_dof - k += 1 - newDoFs = unsortedArrayIndexSet(newDoFsArray) - n._local_dofs = newDoFs - local_boxes = uninitialized((local_dm.num_dofs, mesh.dim, 2), dtype=REAL) - for local_dof in range(local_dm.num_dofs): - global_dof = lclR.indices[local_dof] - for i in range(mesh.dim): - for j in range(2): - local_boxes[local_dof, i, j] = boxes[global_dof, i, j] - return local_boxes, local_dm, lclR, lclP - - def getH2(self, BOOL_t returnNearField=False, returnTree=False, tree_node root=None, tree_node myRoot=None, dict jumps={}, BOOL_t ignoreDiagonalBlocks=False): - cdef: - meshBase mesh = self.mesh - DoFMap DoFMap = self.dm - REAL_t[:, :, ::1] boxes = None, local_boxes - sparseGraph cells = None - REAL_t[:, ::1] coords = None - dict Pfar - list Pnear - LinearOperator h2 = None, Anear = None - BOOL_t forceUnsymmetric, doDistributedAssembly = False, assembleOnRoot = True, localFarFieldIndexing = False - refinementParams refParams - CSR_LinearOperator lclR - - refParams = self.getH2RefinementParams() - - forceUnsymmetric = self.params.get('forceUnsymmetric', False) - doDistributedAssembly = self.comm is not None and self.comm.size > 1 and DoFMap.num_dofs > self.comm.size - assembleOnRoot = self.params.get('assembleOnRoot', True) - localFarFieldIndexing = self.params.get('localFarFieldIndexing', False) - localFarFieldIndexing = doDistributedAssembly and not assembleOnRoot and localFarFieldIndexing - if doDistributedAssembly and not assembleOnRoot: - assert forceUnsymmetric - - with self.PLogger.Timer('boxes, cells, coords'): - boxes, cells = getDoFBoxesAndCells(self.dm.mesh, self.dm, self.comm) - coords = self.dm.getDoFCoordinates() - - # construct the cluster tree - if root is None: - root, myRoot, jumps, doDistributedAssembly = self.getTree(doDistributedAssembly, refParams, boxes, cells, coords) - - # get the admissible cluster pairs - Pnear, Pfar = self.getAdmissibleClusters(root, myRoot, doDistributedAssembly, refParams, boxes, cells, coords, assembleOnRoot=assembleOnRoot, ignoreDiagonalBlocks=ignoreDiagonalBlocks) - lenPfar = len(Pfar) - if doDistributedAssembly: - lenPfar = self.comm.bcast(lenPfar) - - if lenPfar > 0: - LOGGER.info('interpolation_order: {}, maxLevels: {}, minClusterSize: {}, minMixedClusterSize: {}, minFarFieldBlockSize: {}, eta: {}'.format(refParams.interpolation_order, - refParams.maxLevels, - refParams.minSize, - refParams.minMixedSize, - refParams.farFieldInteractionSize, - refParams.eta)) - - # get near field matrix - with self.PLogger.Timer('near field'): - Anear = self.assembleClusters(Pnear, jumps=jumps, forceUnsymmetric=forceUnsymmetric, myRoot=myRoot, doDistributedAssembly=doDistributedAssembly) - if doDistributedAssembly and assembleOnRoot: - with self.PLogger.Timer('reduceNearOp'): - Anear = self.reduceNearOp(Anear, myRoot.get_dofs()) - - if localFarFieldIndexing: - local_boxes, local_dm, lclR, lclP = self.doLocalFarFieldIndexing(myRoot, boxes) - - with self.PLogger.Timer('leaf values'): - # get leave values - if self.kernel.max_singularity > -self.kernel.dim-2: - if not localFarFieldIndexing: - root.enterLeafValues(mesh, DoFMap, refParams.interpolation_order, boxes, self.comm, assembleOnRoot=assembleOnRoot) - else: - myRoot.enterLeafValues(mesh, local_dm, refParams.interpolation_order, local_boxes, local=True) - elif (self.kernel.min_singularity < -self.kernel.dim-2) and (self.kernel.max_singularity > -self.kernel.dim-4): - if not localFarFieldIndexing: - root.enterLeafValuesGrad(mesh, DoFMap, refParams.interpolation_order, boxes, self.comm) - else: - raise NotImplementedError() - else: - raise NotImplementedError() - - if self.comm is None or (assembleOnRoot and self.comm.rank == 0) or (not assembleOnRoot): - with self.PLogger.Timer('far field'): - # get kernel interpolations - assembleFarFieldInteractions(self.local_matrix.kernel, Pfar, refParams.interpolation_order, DoFMap) - - with self.PLogger.Timer('transfer matrices'): - # get transfer matrices - root.prepareTransferOperators(refParams.interpolation_order) - - if self.comm is None or (assembleOnRoot and self.comm.rank == 0): - h2 = H2Matrix(root, Pfar, Anear) - else: - with self.PLogger.Timer('setup distributed op'): - local_h2 = H2Matrix(root, Pfar, Anear) - if not localFarFieldIndexing: - h2 = DistributedH2Matrix_globalData(local_h2, self.comm) - else: - h2 = DistributedH2Matrix_localData(local_h2, Pnear, self.comm, self.dm, local_dm, lclR, lclP) - else: - h2 = nullOperator(self.dm.num_dofs, self.dm.num_dofs) - LOGGER.info('{}'.format(h2)) - elif len(Pnear) == 0: - h2 = nullOperator(self.dm.num_dofs, self.dm.num_dofs) - else: - LOGGER.info('Cannot assemble H2 operator, assembling dense matrix instead') - with self.PLogger.Timer('dense operator'): - h2 = self.getDense() - if returnNearField: - if returnTree: - return h2, Pnear, root - else: - return h2, Pnear - else: - if returnTree: - return h2, root - else: - return h2 - - def getH2FiniteHorizon(self, LinearOperator Ainf=None): - A = horizonCorrected(self.mesh, self.dm, self.kernel, self.comm, Ainf, logging=isinstance(self.PLogger, (PLogger, LoggingPLogger))) - return A - - cdef class horizonSurfaceIntegral(function): # x -> \int_{B_2(x, horizon)} kernel(x,y) dy cdef: diff --git a/nl/PyNucleus_nl/nonlocalLaplacianBase.pxd b/nl/PyNucleus_nl/nonlocalLaplacianBase.pxd index 3c14fca2..8fbec4dd 100644 --- a/nl/PyNucleus_nl/nonlocalLaplacianBase.pxd +++ b/nl/PyNucleus_nl/nonlocalLaplacianBase.pxd @@ -6,7 +6,7 @@ ################################################################################### cimport numpy as np -from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, ENCODE_t, BOOL_t +from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, COMPLEX_t, ENCODE_t, BOOL_t from PyNucleus_fem.quadrature cimport (simplexQuadratureRule, quadratureRule, doubleSimplexQuadratureRule, GaussJacobi, simplexDuffyTransformation, simplexXiaoGimbutas, @@ -28,6 +28,7 @@ from . fractionalOrders cimport (fractionalOrderBase, constFractionalOrder, variableFractionalOrder) from . kernelsCy cimport (Kernel, + ComplexKernel, FractionalKernel) from . clusterMethodCy cimport tree_node ctypedef INDEX_t panelType @@ -43,89 +44,107 @@ cdef class PermutationIndexer: cdef INDEX_t rank(self, INDEX_t[::1] perm) -cdef class double_local_matrix_t: - cdef: - REAL_t[:, ::1] vertices1, vertices2 - INDEX_t[:, ::1] cells1, cells2 - public dict distantQuadRules - public DoFMap DoFMap - INDEX_t dim - public bint symmetricLocalMatrix - public bint symmetricCells - public INDEX_t cellNo1, cellNo2 - REAL_t[:, :, ::1] precomputedSimplices - REAL_t[:, ::1] precomputedCenters - REAL_t[::1] precomputedVolumes - REAL_t[::1] precomputedH - REAL_t[:, ::1] simplex1, simplex2 - REAL_t[::1] center1, center2 - volume_t volume1, volume2 - public REAL_t vol1, vol2 - panelType IDENTICAL - REAL_t dmin2, dmax2, dcenter2 - REAL_t h1MaxInv, h2MaxInv, dMaxInv - PermutationIndexer pI_volume, pI_surface - public INDEX_t[::1] perm1, perm2, perm - public INDEX_t[:, ::1] precomputedVolumeSimplexPermutations - public INDEX_t[:, ::1] precomputedSurfaceSimplexPermutations - public INDEX_t[:, ::1] precomputedDoFPermutations - cdef void precomputePermutations(self) - cdef void precomputeSimplices(self) - cdef INDEX_t getCellPairIdentifierSize(self) - cdef void computeCellPairIdentifierBase(self, INDEX_t[::1] ID, INDEX_t *perm) - cdef void computeCellPairIdentifier(self, INDEX_t[::1] ID, INDEX_t *perm) - cdef void setMesh1(self, meshBase mesh1) - cdef void setMesh2(self, meshBase mesh2) - cdef void setVerticesCells1(self, REAL_t[:, ::1] vertices1, INDEX_t[:, ::1] cells1) - cdef void setVerticesCells2(self, REAL_t[:, ::1] vertices2, INDEX_t[:, ::1] cells2) - cdef void setCell1(self, INDEX_t cellNo1) - cdef void setCell2(self, INDEX_t cellNo2) - cdef void setSimplex1(self, REAL_t[:, ::1] simplex1) - cdef void setSimplex2(self, REAL_t[:, ::1] simplex2) - cdef void swapCells(self) - cdef void eval(self, - REAL_t[::1] contrib, - panelType panel, - MASK_t mask=*) - cdef void evalVector(self, - REAL_t[:, ::1] contrib, - panelType panel, - MASK_t mask=*) - cdef panelType getQuadOrder(self, - const REAL_t h1, - const REAL_t h2, - REAL_t d) - cdef panelType getProtoPanelType(self) - cdef void computeCenterDistance(self) - cdef void computeExtremeDistances(self) - cpdef panelType getPanelType(self) - cdef void addQuadRule(self, panelType panel) - cdef REAL_t get_h_simplex(self, const REAL_t[:, ::1] simplex) - cdef REAL_t get_h_surface_simplex(self, const REAL_t[:, ::1] simplex) - cdef void getSimplexCenter(self, - const REAL_t[:, ::1] simplex, - REAL_t[::1] center) - - -cdef class nonlocalLaplacian(double_local_matrix_t): - cdef: - public REAL_t H0, hmin, num_dofs - void* localShapeFunctions - dict specialQuadRules - public Kernel kernel - REAL_t[:, ::1] x, y - void** distantQuadRulesPtr - REAL_t[::1] temp, temp2 - public REAL_t[::1] n, w - cdef void getNearQuadRule(self, panelType panel) - cdef inline shapeFunction getLocalShapeFunction(self, INDEX_t local_dof) - cdef void addQuadRule(self, panelType panel) - cdef void addQuadRule_nonSym(self, panelType panel) - cdef void addQuadRule_boundary(self, panelType panel) - cdef void getNonSingularNearQuadRule(self, panelType panel) - cdef void eval_distant(self, REAL_t[::1] contrib, panelType panel, MASK_t mask=*) - cdef void eval_distant_nonsym(self, REAL_t[::1] contrib, panelType panel, MASK_t mask=*) - cdef void eval_distant_boundary(self, REAL_t[::1] contrib, panelType panel, MASK_t mask=*) +include "nonlocalLaplacianBase_decl_REAL.pxi" +include "nonlocalLaplacianBase_decl_COMPLEX.pxi" + + +# cdef class double_local_matrix_t: +# cdef: +# REAL_t[:, ::1] vertices1, vertices2 +# INDEX_t[:, ::1] cells1, cells2 +# public dict distantQuadRules +# public DoFMap DoFMap +# INDEX_t dim +# INDEX_t manifold_dim1, manifold_dim2 +# public bint symmetricLocalMatrix +# public bint symmetricCells +# public INDEX_t cellNo1, cellNo2 +# REAL_t[:, :, ::1] precomputedSimplices +# REAL_t[:, ::1] precomputedCenters +# REAL_t[::1] precomputedVolumes +# REAL_t[::1] precomputedH +# REAL_t[:, ::1] simplex1, simplex2 +# REAL_t[::1] center1, center2 +# volume_t volume1, volume2 +# public REAL_t vol1, vol2 +# panelType IDENTICAL +# REAL_t dmin2, dmax2, dcenter2 +# REAL_t h1MaxInv, h2MaxInv, dMaxInv +# PermutationIndexer pI_volume, pI_surface +# public INDEX_t[::1] perm1, perm2, perm +# public INDEX_t[:, ::1] precomputedVolumeSimplexPermutations +# public INDEX_t[:, ::1] precomputedSurfaceSimplexPermutations +# public INDEX_t[:, ::1] precomputedDoFPermutations +# cdef void precomputePermutations(self) +# cdef void precomputeSimplices(self) +# cdef INDEX_t getCellPairIdentifierSize(self) +# cdef void computeCellPairIdentifierBase(self, INDEX_t[::1] ID, INDEX_t *perm) +# cdef void computeCellPairIdentifier(self, INDEX_t[::1] ID, INDEX_t *perm) +# cdef void setMesh1(self, meshBase mesh1) +# cdef void setMesh2(self, meshBase mesh2) +# cdef void setVerticesCells1(self, REAL_t[:, ::1] vertices1, INDEX_t[:, ::1] cells1) +# cdef void setVerticesCells2(self, REAL_t[:, ::1] vertices2, INDEX_t[:, ::1] cells2) +# cdef void setCell1(self, INDEX_t cellNo1) +# cdef void setCell2(self, INDEX_t cellNo2) +# cdef void setSimplex1(self, REAL_t[:, ::1] simplex1) +# cdef void setSimplex2(self, REAL_t[:, ::1] simplex2) +# cdef void swapCells(self) +# cdef void eval(self, +# REAL_t[::1] contrib, +# panelType panel, +# MASK_t mask=*) +# cdef void evalVector(self, +# REAL_t[:, ::1] contrib, +# panelType panel, +# MASK_t mask=*) +# cdef panelType getQuadOrder(self, +# const REAL_t h1, +# const REAL_t h2, +# REAL_t d) +# cdef panelType getProtoPanelType(self) +# cdef void computeCenterDistance(self) +# cdef void computeExtremeDistances(self) +# cpdef panelType getPanelType(self) +# cdef void addQuadRule(self, panelType panel) +# cdef REAL_t get_h_simplex(self, const REAL_t[:, ::1] simplex) +# cdef REAL_t get_h_surface_simplex(self, const REAL_t[:, ::1] simplex) +# cdef void getSimplexCenter(self, +# const REAL_t[:, ::1] simplex, +# REAL_t[::1] center) + + +# cdef class nonlocalLaplacian(double_local_matrix_t): +# cdef: +# public REAL_t H0, hmin, num_dofs +# void* localShapeFunctions +# dict specialQuadRules +# public Kernel kernel +# REAL_t[:, ::1] x, y +# void** distantQuadRulesPtr +# REAL_t[::1] temp, temp2 +# # private +# REAL_t[::1] vec +# REAL_t[::1] vec2 +# REAL_t[:, ::1] tempVector +# REAL_t[:, ::1] tempVector2 +# # release +# public REAL_t[::1] n, w +# cdef void getNearQuadRule(self, panelType panel) +# cdef inline shapeFunction getLocalShapeFunction(self, INDEX_t local_dof) +# cdef void addQuadRule(self, panelType panel) +# cdef void addQuadRule_nonSym(self, panelType panel) +# cdef void addQuadRule_boundary(self, panelType panel) +# cdef void getNonSingularNearQuadRule(self, panelType panel) +# cdef void eval_distant(self, REAL_t[::1] contrib, panelType panel, MASK_t mask=*) +# cdef void eval_distant_nonsym(self, REAL_t[::1] contrib, panelType panel, MASK_t mask=*) +# cdef void eval_distant_boundary(self, REAL_t[::1] contrib, panelType panel, MASK_t mask=*) +# # private +# cdef void eval_distant_tensor(self, REAL_t[::1] contrib, panelType panel, MASK_t mask=*) +# cdef void eval_distant_tensor_boundary(self, REAL_t[::1] contrib, panelType panel, MASK_t mask=*) +# cdef void eval_distant_vector(self, REAL_t[:, ::1] contrib, panelType panel, MASK_t mask=*) +# cdef void eval_distant_nonsym_vector(self, REAL_t[:, ::1] contrib, panelType panel, MASK_t mask=*) +# cdef void eval_distant_boundary_vector(self, REAL_t[:, ::1] contrib, panelType panel, MASK_t mask=*) +# # release cdef class specialQuadRule: diff --git a/nl/PyNucleus_nl/nonlocalLaplacianBase.pyx b/nl/PyNucleus_nl/nonlocalLaplacianBase.pyx index f78052a9..3750257a 100644 --- a/nl/PyNucleus_nl/nonlocalLaplacianBase.pyx +++ b/nl/PyNucleus_nl/nonlocalLaplacianBase.pyx @@ -11,7 +11,7 @@ from libc.math cimport (sin, cos, sinh, cosh, tanh, sqrt, atan2, log, ceil, fabs as abs, M_PI as pi, pow, tgamma as gamma) -from PyNucleus_base.myTypes import INDEX, REAL, ENCODE, BOOL +from PyNucleus_base.myTypes import INDEX, REAL, COMPLEX, ENCODE, BOOL from PyNucleus_base import uninitialized from PyNucleus_base.blas cimport mydot from libc.stdlib cimport malloc, free @@ -82,412 +82,9 @@ cdef class PermutationIndexer: return self.rank(perm) -cdef class double_local_matrix_t: - def __init__(self, INDEX_t dim, INDEX_t manifold_dim1, INDEX_t manifold_dim2, DoFMap dm): - self.distantQuadRules = {} - self.dim = dim - self.symmetricLocalMatrix = True - self.symmetricCells = True - self.cellNo1 = -1 - self.cellNo2 = -1 - self.vol1 = np.nan - self.vol2 = np.nan - - self.DoFMap = dm - self.precomputePermutations() - - if dim == 1: - self.volume1 = volume1Dsimplex - elif dim == 2: - self.volume1 = volume2Dsimplex - elif dim == 3: - self.volume1 = volume3Dsimplex - else: - raise NotImplementedError() - - if dim == 1 and manifold_dim2 == 1: - self.volume2 = volume1Dsimplex - elif dim == 1 and manifold_dim2 == 0: - self.volume2 = volume0Dsimplex - elif dim == 2 and manifold_dim2 == 2: - self.volume2 = volume2Dsimplex - elif dim == 2 and manifold_dim2 == 1: - self.volume2 = volume1Din2Dsimplex - elif dim == 3 and manifold_dim2 == 3: - self.volume2 = volume3Dsimplex - elif dim == 3 and manifold_dim2 == 2: - self.volume2 = volume2Din3Dsimplex - else: - raise NotImplementedError() - - if self.dim == 1: - self.IDENTICAL = COMMON_EDGE - elif self.dim == 2: - self.IDENTICAL = COMMON_FACE - elif self.dim == 3: - self.IDENTICAL = COMMON_VOLUME - else: - raise NotImplementedError() - self.center1 = uninitialized((self.dim), dtype=REAL) - self.center2 = uninitialized((self.dim), dtype=REAL) - - cdef void precomputePermutations(self): - cdef: - INDEX_t[:, ::1] perms, surface_perms - INDEX_t r, j, dofPerm, dofOrig, index, k - tuple permTuple - INDEX_t[::1] perm - REAL_t eps = 1e-10 - INDEX_t dim = self.DoFMap.mesh.dim - - perms = uninitialized((np.math.factorial(dim+1), dim+1), dtype=INDEX) - surface_perms = uninitialized((np.math.factorial(dim), dim), dtype=INDEX) - - from itertools import permutations - - self.pI_volume = PermutationIndexer(dim+1) - for permTuple in permutations(range(dim+1)): - perm = np.array(permTuple, dtype=INDEX) - index = self.pI_volume.rank(perm) - for k in range(dim+1): - perms[index, k] = perm[k] - - self.pI_surface = PermutationIndexer(dim) - for permTuple in permutations(range(dim)): - perm = np.array(permTuple, dtype=INDEX) - index = self.pI_surface.rank(perm) - for k in range(dim): - surface_perms[index, k] = perm[k] - - self.precomputedVolumeSimplexPermutations = perms - self.precomputedSurfaceSimplexPermutations = surface_perms - self.precomputedDoFPermutations = uninitialized((perms.shape[0], - self.DoFMap.dofs_per_element), dtype=INDEX) - for r in range(perms.shape[0]): - for dofPerm in range(self.DoFMap.dofs_per_element): - for dofOrig in range(self.DoFMap.dofs_per_element): - for j in range(dim+1): - if abs(self.DoFMap.nodes[dofPerm, j]-self.DoFMap.nodes[dofOrig, perms[r, j]]) > eps: - break - else: - self.precomputedDoFPermutations[r, dofPerm] = dofOrig - break - else: - # We should never get here - raise NotImplementedError() - - cdef void precomputeSimplices(self): - # mesh1 and mesh 2 will be the same - cdef: - INDEX_t cellNo1 - INDEX_t m, k, l - REAL_t fac = 1./self.cells1.shape[1] - self.precomputedSimplices = uninitialized((self.cells1.shape[0], self.cells1.shape[1], self.dim), dtype=REAL) - self.precomputedCenters = np.zeros((self.cells1.shape[0], self.dim), dtype=REAL) - for cellNo1 in range(self.cells1.shape[0]): - for m in range(self.cells1.shape[1]): - k = self.cells1[cellNo1, m] - for l in range(self.vertices1.shape[1]): - self.precomputedSimplices[cellNo1, m, l] = self.vertices1[k, l] - self.precomputedCenters[cellNo1, l] += self.vertices1[k, l] - for l in range(self.vertices1.shape[1]): - self.precomputedCenters[cellNo1, l] *= fac - - cdef INDEX_t getCellPairIdentifierSize(self): - return -1 - - cdef void computeCellPairIdentifierBase(self, INDEX_t[::1] ID, INDEX_t *perm): - raise NotImplementedError() - - cdef void computeCellPairIdentifier(self, INDEX_t[::1] ID, INDEX_t *perm): - self.computeCellPairIdentifierBase(ID, perm) - - def computeCellPairIdentifier_py(self): - cdef: - INDEX_t perm = 0 - ID = uninitialized((self.getCellPairIdentifierSize()), dtype=INDEX) - self.computeCellPairIdentifier(ID, &perm) - return ID, perm - - cdef void setMesh1(self, meshBase mesh1): - self.setVerticesCells1(mesh1.vertices, mesh1.cells) - self.precomputedVolumes = mesh1.volVector - self.precomputedH = mesh1.hVector - h1 = 2.*mesh1.h - d = 2.*mesh1.diam - self.h1MaxInv = 1./h1 - self.dMaxInv = 1./d - - cdef void setVerticesCells1(self, REAL_t[:, ::1] vertices1, INDEX_t[:, ::1] cells1): - self.vertices1 = vertices1 - self.cells1 = cells1 - self.simplex1 = uninitialized((self.cells1.shape[1], self.dim), dtype=REAL) - self.perm1 = uninitialized((self.cells1.shape[1]), dtype=INDEX) - self.cellNo1 = -1 - self.cellNo2 = -1 - if self.symmetricCells: - # mesh1 and mesh 2 will be the same - self.precomputeSimplices() - - cdef void setMesh2(self, meshBase mesh2): - self.setVerticesCells2(mesh2.vertices, mesh2.cells) - if mesh2.manifold_dim > 0: - h2 = 2.*mesh2.h - self.h2MaxInv = 1./h2 - else: - self.h2MaxInv = 1. - - cdef void setVerticesCells2(self, REAL_t[:, ::1] vertices2, INDEX_t[:, ::1] cells2): - self.vertices2 = vertices2 - self.cells2 = cells2 - self.simplex2 = uninitialized((self.cells2.shape[1], self.dim), dtype=REAL) - self.perm2 = uninitialized((self.cells2.shape[1]), dtype=INDEX) - self.perm = uninitialized((2*self.DoFMap.dofs_per_element), dtype=INDEX) - self.cellNo1 = -1 - self.cellNo2 = -1 - - cdef void setCell1(self, INDEX_t cellNo1): - if self.cellNo1 == cellNo1: - return - self.cellNo1 = cellNo1 - if not self.symmetricCells: - getSimplexAndCenter(self.cells1, self.vertices1, self.cellNo1, self.simplex1, self.center1) - self.vol1 = self.volume1(self.simplex1) - else: - self.simplex1 = self.precomputedSimplices[cellNo1, :, :] - self.center1 = self.precomputedCenters[cellNo1, :] - self.vol1 = self.precomputedVolumes[cellNo1] - - cdef void setCell2(self, INDEX_t cellNo2): - if self.cellNo2 == cellNo2: - return - self.cellNo2 = cellNo2 - if not self.symmetricCells: - getSimplexAndCenter(self.cells2, self.vertices2, self.cellNo2, self.simplex2, self.center2) - self.vol2 = self.volume2(self.simplex2) - else: - self.simplex2 = self.precomputedSimplices[cellNo2, :, :] - self.center2 = self.precomputedCenters[cellNo2, :] - self.vol2 = self.precomputedVolumes[cellNo2] - - def setMesh1_py(self, meshBase mesh1): - self.setMesh1(mesh1) - - def setMesh2_py(self, meshBase mesh2): - self.setMesh2(mesh2) - - def setCell1_py(self, INDEX_t cellNo1): - self.setCell1(cellNo1) - - def setCell2_py(self, INDEX_t cellNo2): - self.setCell2(cellNo2) - - cdef void swapCells(self): - self.cellNo1, self.cellNo2 = self.cellNo2, self.cellNo1 - self.simplex1, self.simplex2 = self.simplex2, self.simplex1 - self.center1, self.center2 = self.center2, self.center1 - - cdef void setSimplex1(self, REAL_t[:, ::1] simplex1): - self.simplex1 = simplex1 - self.getSimplexCenter(self.simplex1, self.center1) - self.vol1 = self.volume1(self.simplex1) - - cdef void setSimplex2(self, REAL_t[:, ::1] simplex2): - self.simplex2 = simplex2 - self.getSimplexCenter(self.simplex2, self.center2) - self.vol2 = self.volume2(self.simplex2) - - def __call__(self, - REAL_t[::1] contrib, - panelType panel): - return self.eval(contrib, panel) - - cdef void eval(self, - REAL_t[::1] contrib, - panelType panel, - MASK_t mask=ALL): - raise NotImplementedError() - - cdef void evalVector(self, - REAL_t[:, ::1] contrib, - panelType panel, - MASK_t mask=ALL): - raise NotImplementedError() - - def eval_py(self, - REAL_t[::1] contrib, - panel): - self.eval(contrib, panel, ALL) - - def evalVector_py(self, - REAL_t[:, ::1] contrib, - panel): - self.evalVector(contrib, panel, ALL) - - cdef panelType getQuadOrder(self, - const REAL_t h1, - const REAL_t h2, - REAL_t d): - raise NotImplementedError() - - cdef REAL_t get_h_simplex(self, const REAL_t[:, ::1] simplex): - raise NotImplementedError() - - cdef REAL_t get_h_surface_simplex(self, const REAL_t[:, ::1] simplex): - raise NotImplementedError() - - cdef void getSimplexCenter(self, - const REAL_t[:, ::1] simplex, - REAL_t[::1] center): - cdef: - INDEX_t i, j - REAL_t fac - center[:] = 0. - for i in range(simplex.shape[0]): - for j in range(simplex.shape[1]): - center[j] += simplex[i, j] - fac = 1./simplex.shape[0] - for j in range(simplex.shape[1]): - center[j] *= fac - - cdef panelType getProtoPanelType(self): - # Given two cells, determines their relationship: - # - COMMON_FACE - # - COMMON_EDGE - # - COMMON_VERTEX - # - DISTANT - cdef: - INDEX_t mask1 = 0, mask2 = 0 - INDEX_t numVertices1 = self.cells1.shape[1] - INDEX_t numVertices2 = self.cells2.shape[1] - INDEX_t vertexNo1, vertexNo2, vertex1, vertex2 - INDEX_t commonVertices = 0 - INDEX_t k, i - INDEX_t dofs_per_vertex, dofs_per_edge, dofs_per_face, dofs_per_element = self.DoFMap.dofs_per_element - panelType panel - INDEX_t chosenPermutation - if self.symmetricCells: - if self.cellNo1 > self.cellNo2: - return IGNORED - - if (self.cells1.shape[1] == self.cells2.shape[1]) and (self.cellNo1 == self.cellNo2): - for k in range(numVertices1): - self.perm1[k] = k - for k in range(numVertices2): - self.perm2[k] = k - for k in range(dofs_per_element): - self.perm[k] = k - return self.IDENTICAL - - # now the two simplices can share at most numVertices1-1 vertices - - for vertexNo1 in range(numVertices1): - vertex1 = self.cells1[self.cellNo1, vertexNo1] - for vertexNo2 in range(numVertices2): - if mask2 & (1 << vertexNo2): - continue - vertex2 = self.cells2[self.cellNo2, vertexNo2] - if vertex1 == vertex2: - self.perm1[commonVertices] = vertexNo1 - self.perm2[commonVertices] = vertexNo2 - mask1 += (1 << vertexNo1) - mask2 += (1 << vertexNo2) - commonVertices += 1 - break - - if commonVertices == 0: - for k in range(numVertices1): - self.perm1[k] = k - for k in range(numVertices2): - self.perm2[k] = k - for k in range(dofs_per_element): - self.perm[k] = k - return 0 - else: - i = 0 - for k in range(commonVertices, numVertices1): - while mask1 & (1 << i): - i += 1 - self.perm1[k] = i - mask1 += (1 << i) - - i = 0 - for k in range(commonVertices, numVertices2): - while mask2 & (1 << i): - i += 1 - self.perm2[k] = i - mask2 += (1 << i) - - # we now have set permutations for the two simplices - # we have at least one shared vertex - - chosenPermutation = self.pI_volume.rank(self.perm1) - for k in range(dofs_per_element): - self.perm[k] = self.precomputedDoFPermutations[chosenPermutation, k] - - if numVertices1 == numVertices2: - dofs_per_vertex = self.DoFMap.dofs_per_vertex - dofs_per_edge = self.DoFMap.dofs_per_edge - dofs_per_face = self.DoFMap.dofs_per_face - - chosenPermutation = self.pI_volume.rank(self.perm2) - if commonVertices == 1: - for k in range(dofs_per_vertex, dofs_per_element): - self.perm[dofs_per_element+k-dofs_per_vertex] = dofs_per_element+self.precomputedDoFPermutations[chosenPermutation, k] - elif commonVertices == 2: - for k in range(2*dofs_per_vertex, numVertices2*dofs_per_vertex): - self.perm[dofs_per_element+k-2*dofs_per_vertex] = dofs_per_element+self.precomputedDoFPermutations[chosenPermutation, k] - for k in range(numVertices2*dofs_per_vertex+dofs_per_edge, dofs_per_element): - self.perm[dofs_per_element+k-2*dofs_per_vertex-dofs_per_edge] = dofs_per_element+self.precomputedDoFPermutations[chosenPermutation, k] - elif commonVertices == 3: - # only in 3d - for k in range(3*dofs_per_vertex, numVertices2*dofs_per_vertex): - self.perm[dofs_per_element+k-3*dofs_per_vertex] = dofs_per_element+self.precomputedDoFPermutations[chosenPermutation, k] - for k in range(numVertices2*dofs_per_vertex+3*dofs_per_edge, numVertices2*dofs_per_vertex+6*dofs_per_edge): - self.perm[dofs_per_element+k-3*dofs_per_vertex-3*dofs_per_edge] = dofs_per_element+self.precomputedDoFPermutations[chosenPermutation, k] - for k in range(numVertices2*dofs_per_vertex+6*dofs_per_edge+dofs_per_face, dofs_per_element): - self.perm[dofs_per_element+k-3*dofs_per_vertex-3*dofs_per_edge-dofs_per_face] = dofs_per_element+self.precomputedDoFPermutations[chosenPermutation, k] - panel = -commonVertices - return panel - - cdef void computeCenterDistance(self): - cdef: - INDEX_t j - REAL_t d2 = 0. - for j in range(self.dim): - d2 += (self.center1[j]-self.center2[j])**2 - self.dcenter2 = d2 - - cdef void computeExtremeDistances(self): - cdef: - INDEX_t i, k, j - INDEX_t noSimplex1 = self.simplex1.shape[0] - INDEX_t noSimplex2 = self.simplex2.shape[0] - REAL_t d2 - REAL_t dmin2 = inf - REAL_t dmax2 = 0. - for i in range(noSimplex1): - for k in range(noSimplex2): - d2 = 0. - for j in range(self.dim): - d2 += (self.simplex1[i, j] - self.simplex2[k, j])**2 - dmin2 = min(dmin2, d2) - dmax2 = max(dmax2, d2) - self.dmin2 = dmin2 - self.dmax2 = dmax2 - - cpdef panelType getPanelType(self): - raise NotImplementedError() - - cdef void addQuadRule(self, panelType panel): - raise NotImplementedError() - - def addQuadRule_py(self, panelType panel): - self.addQuadRule(panel) - - def __repr__(self): - return '{}\n'.format(self.__class__.__name__) +include "nonlocalLaplacianBase_REAL.pxi" +include "nonlocalLaplacianBase_COMPLEX.pxi" cdef class specialQuadRule: @@ -511,632 +108,1123 @@ cdef class specialQuadRule: cdef panelType MAX_PANEL = 120 -cdef class nonlocalLaplacian(double_local_matrix_t): - def __init__(self, - Kernel kernel, - meshBase mesh, DoFMap dm, - num_dofs=None, INDEX_t manifold_dim2=-1): - cdef: - shapeFunction sf - INDEX_t i - if manifold_dim2 < 0: - manifold_dim2 = mesh.manifold_dim - double_local_matrix_t.__init__(self, mesh.dim, mesh.manifold_dim, manifold_dim2, dm) - if num_dofs is None: - self.num_dofs = dm.num_dofs - else: - self.num_dofs = num_dofs - self.hmin = mesh.hmin - self.H0 = mesh.diam/sqrt(8) - self.localShapeFunctions = malloc(self.DoFMap.dofs_per_element*sizeof(void*)) - for i in range(self.DoFMap.dofs_per_element): - sf = dm.localShapeFunctions[i] - ((self.localShapeFunctions+i*sizeof(void*)))[0] = sf - self.specialQuadRules = {} - self.distantQuadRulesPtr = malloc(MAX_PANEL*sizeof(void*)) - for i in range(MAX_PANEL): - self.distantQuadRulesPtr[i] = NULL - - self.x = uninitialized((0, self.dim), dtype=REAL) - self.y = uninitialized((0, self.dim), dtype=REAL) - self.temp = uninitialized((0), dtype=REAL) - - self.n = uninitialized((self.dim), dtype=REAL) - self.w = uninitialized((self.dim), dtype=REAL) - - self.kernel = kernel - - if self.kernel.variable: - self.symmetricCells = self.kernel.symmetric - self.symmetricLocalMatrix = self.kernel.symmetric - else: - self.symmetricCells = True - self.symmetricLocalMatrix = True - - if self.kernel.variableHorizon: - self.symmetricCells = False - - def __del__(self): - free(self.localShapeFunctions) - free(self.distantQuadRulesPtr) - - cdef void getNearQuadRule(self, panelType panel): - raise NotImplementedError() - - cdef void computeCellPairIdentifier(self, INDEX_t[::1] ID, INDEX_t *perm): - assert not self.kernel.variable - if self.kernel.finiteHorizon: - self.computeExtremeDistances() - if self.dmax2 <= self.kernel.getHorizonValue2(): - # entirely within horizon - self.computeCellPairIdentifierBase(ID, perm) - elif self.dmin2 >= self.kernel.getHorizonValue2(): - # entirely outside of horizon - ID[0] = IGNORED - else: - # on horizon - ID[0] = ON_HORIZON - else: - self.computeCellPairIdentifierBase(ID, perm) - - cpdef panelType getPanelType(self): - # Given two cells, determines their relationship: - # - COMMON_FACE - # - COMMON_EDGE - # - COMMON_VERTEX - # - DISTANT - # - IGNORED - cdef: - panelType panel - REAL_t d, h1, h2 - REAL_t alpha - panel = self.getProtoPanelType() - - if panel == IGNORED: - return IGNORED - - if self.kernel.variable: - self.kernel.evalParams(self.center1, self.center2) - - if panel == DISTANT: - if self.kernel.interaction.getRelativePosition(self.simplex1, self.simplex2) == REMOTE: - return IGNORED - - self.computeCenterDistance() - d = sqrt(self.dcenter2) - - if self.symmetricCells: - h1 = self.precomputedH[self.cellNo1] - else: - h1 = self.get_h_simplex(self.simplex1) - if self.cells1.shape[1] == self.cells2.shape[1]: - if self.symmetricCells: - h2 = self.precomputedH[self.cellNo2] - else: - h2 = self.get_h_simplex(self.simplex2) - else: - h2 = self.get_h_surface_simplex(self.simplex2) - panel = self.getQuadOrder(h1, h2, d) - elif self.kernel.variable: - self.getNearQuadRule(panel) - if panel < 0: - alpha = self.kernel.getSingularityValue() - if alpha == 0.: - self.kernel.interaction.getRelativePosition(self.simplex1, self.simplex2) - return panel - - def __repr__(self): - return (super(nonlocalLaplacian, self).__repr__() + - 'kernel: {}\n'.format(self.kernel)) - - cdef inline shapeFunction getLocalShapeFunction(self, INDEX_t local_dof): - return ((((self.localShapeFunctions+local_dof*sizeof(void*)))[0])) - - cdef void addQuadRule(self, panelType panel): - cdef: - simplexQuadratureRule qr0, qr1 - doubleSimplexQuadratureRule qr2 - specialQuadRule sQR - REAL_t[:, ::1] PSI - INDEX_t I, k, i, j - INDEX_t numQuadNodes0, numQuadNodes1, dofs_per_element - shapeFunction sf - qr0 = simplexXiaoGimbutas(panel, self.dim) - qr1 = qr0 - qr2 = doubleSimplexQuadratureRule(qr0, qr1) - numQuadNodes0 = qr0.num_nodes - numQuadNodes1 = qr1.num_nodes - dofs_per_element = self.DoFMap.dofs_per_element - PSI = uninitialized((2*dofs_per_element, - qr2.num_nodes), dtype=REAL) - # phi_i(x) - phi_i(y) = phi_i(x) - for I in range(self.DoFMap.dofs_per_element): - sf = self.getLocalShapeFunction(I) - k = 0 - for i in range(numQuadNodes0): - for j in range(numQuadNodes1): - PSI[I, k] = sf(qr0.nodes[:, i]) - k += 1 - # phi_i(x) - phi_i(y) = -phi_i(y) - for I in range(self.DoFMap.dofs_per_element): - sf = self.getLocalShapeFunction(I) - k = 0 - for i in range(numQuadNodes0): - for j in range(numQuadNodes1): - PSI[I+dofs_per_element, k] = -sf(qr1.nodes[:, j]) - k += 1 - sQR = specialQuadRule(qr2, PSI) - self.distantQuadRules[panel] = sQR - self.distantQuadRulesPtr[panel] = (self.distantQuadRules[panel]) - - if numQuadNodes0 > self.x.shape[0]: - self.x = uninitialized((numQuadNodes0, self.dim), dtype=REAL) - if numQuadNodes1 > self.y.shape[0]: - self.y = uninitialized((numQuadNodes1, self.dim), dtype=REAL) - if numQuadNodes0*numQuadNodes1 > self.temp.shape[0]: - self.temp = uninitialized((numQuadNodes0*numQuadNodes1), dtype=REAL) - - cdef void addQuadRule_nonSym(self, panelType panel): - cdef: - simplexQuadratureRule qr0, qr1 - doubleSimplexQuadratureRule qr2 - specialQuadRule sQR - REAL_t[:, ::1] PSI - REAL_t[:, :, ::1] PHI - INDEX_t I, k, i, j - INDEX_t numQuadNodes0, numQuadNodes1, dofs_per_element - shapeFunction sf - qr0 = simplexXiaoGimbutas(panel, self.dim) - qr1 = qr0 - qr2 = doubleSimplexQuadratureRule(qr0, qr1) - numQuadNodes0 = qr0.num_nodes - numQuadNodes1 = qr1.num_nodes - dofs_per_element = self.DoFMap.dofs_per_element - PSI = uninitialized((2*dofs_per_element, - qr2.num_nodes), dtype=REAL) - PHI = uninitialized((2, - 2*dofs_per_element, - qr2.num_nodes), dtype=REAL) - # phi_i(x) - phi_i(y) = phi_i(x) - for I in range(self.DoFMap.dofs_per_element): - sf = self.getLocalShapeFunction(I) - k = 0 - for i in range(numQuadNodes0): - for j in range(numQuadNodes1): - PHI[0, I, k] = sf(qr0.nodes[:, i]) - PHI[1, I, k] = 0. - PSI[I, k] = PHI[0, I, k] - k += 1 - # phi_i(x) - phi_i(y) = -phi_i(y) - for I in range(self.DoFMap.dofs_per_element): - sf = self.getLocalShapeFunction(I) - k = 0 - for i in range(numQuadNodes0): - for j in range(numQuadNodes1): - PHI[0, I+dofs_per_element, k] = 0. - PHI[1, I+dofs_per_element, k] = sf(qr1.nodes[:, j]) - PSI[I+dofs_per_element, k] = -PHI[1, I+dofs_per_element, k] - k += 1 - sQR = specialQuadRule(qr2, PSI, PHI3=PHI) - self.distantQuadRules[panel] = sQR - self.distantQuadRulesPtr[panel] = (self.distantQuadRules[panel]) - - if numQuadNodes0 > self.x.shape[0]: - self.x = uninitialized((numQuadNodes0, self.dim), dtype=REAL) - if numQuadNodes1 > self.y.shape[0]: - self.y = uninitialized((numQuadNodes1, self.dim), dtype=REAL) - if numQuadNodes0*numQuadNodes1 > self.temp.shape[0]: - self.temp = uninitialized((numQuadNodes0*numQuadNodes1), dtype=REAL) - self.temp2 = uninitialized((numQuadNodes0*numQuadNodes1), dtype=REAL) - - cdef void getNonSingularNearQuadRule(self, panelType panel): - cdef: - simplexQuadratureRule qr0, qr1 - doubleSimplexQuadratureRule qr2 - specialQuadRule sQR - REAL_t[:, ::1] PSI - INDEX_t I, k, i, j - INDEX_t numQuadNodes0, numQuadNodes1, dofs_per_element - shapeFunction sf - try: - sQR = (self.distantQuadRules[MAX_PANEL+panel]) - except KeyError: - quadOrder = max(ceil(self.target_order), 2) - qr0 = simplexXiaoGimbutas(quadOrder, self.dim) - qr1 = qr0 - qr2 = doubleSimplexQuadratureRule(qr0, qr1) - numQuadNodes0 = qr0.num_nodes - numQuadNodes1 = qr1.num_nodes - dofs_per_element = self.DoFMap.dofs_per_element - PSI = uninitialized((2*dofs_per_element, - qr2.num_nodes), - dtype=REAL) - - # phi_i(x) - phi_i(y) = phi_i(x) - for I in range(dofs_per_element): - sf = self.getLocalShapeFunction(I) - k = 0 - for i in range(numQuadNodes0): - for j in range(numQuadNodes1): - PSI[I, k] = sf(qr0.nodes[:, i]) - k += 1 - # phi_i(x) - phi_i(y) = -phi_i(y) - for I in range(dofs_per_element): - sf = self.getLocalShapeFunction(I) - k = 0 - for i in range(numQuadNodes0): - for j in range(numQuadNodes1): - PSI[I+dofs_per_element, k] = -sf(qr1.nodes[:, j]) - k += 1 - sQR = specialQuadRule(qr2, PSI) - self.distantQuadRules[MAX_PANEL+panel] = sQR - self.distantQuadRulesPtr[MAX_PANEL+panel] = (self.distantQuadRules[MAX_PANEL+panel]) - if numQuadNodes0 > self.x.shape[0]: - self.x = uninitialized((numQuadNodes0, self.dim), dtype=REAL) - if numQuadNodes1 > self.y.shape[0]: - self.y = uninitialized((numQuadNodes1, self.dim), dtype=REAL) - if qr2.num_nodes > self.temp.shape[0]: - self.temp = uninitialized((qr2.num_nodes), dtype=REAL) - - cdef void eval_distant(self, - REAL_t[::1] contrib, - panelType panel, - MASK_t mask=ALL): - cdef: - INDEX_t k, i, j, I, J - REAL_t vol, val, vol1 = self.vol1, vol2 = self.vol2 - doubleSimplexQuadratureRule qr2 - REAL_t[:, ::1] PSI - REAL_t[:, ::1] simplex1 = self.simplex1 - REAL_t[:, ::1] simplex2 = self.simplex2 - INDEX_t dim = simplex1.shape[1] - REAL_t c1, c2, PSI_I, PSI_J - transformQuadratureRule qr0trans, qr1trans - INDEX_t dofs_per_element, numQuadNodes0, numQuadNodes1 - REAL_t a_b1[3] - REAL_t a_A1[3][3] - REAL_t a_A2[3][3] - REAL_t[::1] b1 - REAL_t[:, ::1] A1, A2 - BOOL_t cutElements = False - - if self.kernel.finiteHorizon: - # check if the horizon might cut the elements - if self.kernel.interaction.relPos == CUT: - cutElements = True - if self.kernel.complement: - cutElements = False - # TODO: cutElements should be set to True, but - # need to figure out the element - # transformation. - - contrib[:] = 0. - - if not cutElements: - vol = vol1*vol2 - if panel < 0: - sQR = (self.distantQuadRulesPtr[MAX_PANEL+panel]) - else: - sQR = (self.distantQuadRulesPtr[panel]) - qr2 = (sQR.qr) - PSI = sQR.PSI - qr2.rule1.nodesInGlobalCoords(simplex1, self.x) - qr2.rule2.nodesInGlobalCoords(simplex2, self.y) - - k = 0 - for i in range(qr2.rule1.num_nodes): - for j in range(qr2.rule2.num_nodes): - self.temp[k] = qr2.weights[k]*self.kernel.evalPtr(dim, - &self.x[i, 0], - &self.y[j, 0]) - k += 1 - - k = 0 - for I in range(2*self.DoFMap.dofs_per_element): - for J in range(I, 2*self.DoFMap.dofs_per_element): - if mask[k]: - val = 0. - for i in range(qr2.num_nodes): - val += self.temp[i] * PSI[I, i] * PSI[J, i] - contrib[k] = val*vol - k += 1 - else: - if panel < 0: - sQR = (self.distantQuadRulesPtr[MAX_PANEL+panel]) - else: - sQR = (self.distantQuadRulesPtr[panel]) - qr2 = (sQR.qr) - if sQR.qrTransformed0 is not None: - qr0trans = sQR.qrTransformed0 - else: - qr0 = qr2.rule1 - qr0trans = transformQuadratureRule(qr0) - sQR.qrTransformed0 = qr0trans - if sQR.qrTransformed1 is not None: - qr1trans = sQR.qrTransformed1 - else: - qr1 = qr2.rule2 - qr1trans = transformQuadratureRule(qr1) - sQR.qrTransformed1 = qr1trans - - numQuadNodes0 = qr0trans.num_nodes - numQuadNodes1 = qr1trans.num_nodes - - vol = vol1*vol2 - dofs_per_element = self.DoFMap.dofs_per_element - - A1 = a_A1 - b1 = a_b1 - A2 = a_A2 - - self.kernel.interaction.startLoopSubSimplices_Simplex(simplex1, simplex2) - while self.kernel.interaction.nextSubSimplex_Simplex(A1, b1, &c1): - qr0trans.setAffineBaryTransform(A1, b1) - qr0trans.nodesInGlobalCoords(simplex1, self.x) - for i in range(qr0trans.num_nodes): - self.kernel.interaction.startLoopSubSimplices_Node(self.x[i, :], simplex2) - while self.kernel.interaction.nextSubSimplex_Node(A2, &c2): - qr1trans.setLinearBaryTransform(A2) - qr1trans.nodesInGlobalCoords(simplex2, self.y) - for j in range(qr1trans.num_nodes): - val = qr0trans.weights[i]*qr1trans.weights[j]*self.kernel.evalPtr(dim, &self.x[i, 0], &self.y[j, 0]) - val *= c1 * c2 * vol - k = 0 - for I in range(2*dofs_per_element): - if I < dofs_per_element: - PSI_I = self.getLocalShapeFunction(I).evalStrided(&qr0trans.nodes[0, i], numQuadNodes0) - else: - PSI_I = -self.getLocalShapeFunction(I-dofs_per_element).evalStrided(&qr1trans.nodes[0, j], numQuadNodes1) - for J in range(I, 2*dofs_per_element): - if mask[k]: - if J < dofs_per_element: - PSI_J = self.getLocalShapeFunction(J).evalStrided(&qr0trans.nodes[0, i], numQuadNodes0) - else: - PSI_J = -self.getLocalShapeFunction(J-dofs_per_element).evalStrided(&qr1trans.nodes[0, j], numQuadNodes1) - contrib[k] += val * PSI_I*PSI_J - k += 1 - - - cdef void eval_distant_nonsym(self, - REAL_t[::1] contrib, - panelType panel, - MASK_t mask=ALL): - cdef: - INDEX_t k, i, j, I, J - REAL_t vol, val, vol1 = self.vol1, vol2 = self.vol2 - doubleSimplexQuadratureRule qr2 - REAL_t[:, ::1] PSI - REAL_t[:, :, ::1] PHI - REAL_t[:, ::1] simplex1 = self.simplex1 - REAL_t[:, ::1] simplex2 = self.simplex2 - INDEX_t dim = simplex1.shape[1] - BOOL_t cutElements = False - REAL_t w - REAL_t c1, c2, val2, PHI_I_0, PHI_I_1, PSI_J - transformQuadratureRule qr0trans, qr1trans - INDEX_t dofs_per_element, numQuadNodes0, numQuadNodes1 - REAL_t a_b1[3] - REAL_t a_A1[3][3] - REAL_t a_A2[3][3] - REAL_t[::1] b1 - REAL_t[:, ::1] A1, A2 - - if self.kernel.finiteHorizon: - # check if the horizon might cut the elements - if self.kernel.interaction.relPos == CUT: - cutElements = True - if self.kernel.complement: - cutElements = False - # TODO: cutElements should be set to True, but - # need to figure out the element - # transformation. - - contrib[:] = 0. - - if not cutElements: - vol = vol1*vol2 - if panel < 0: - sQR = (self.distantQuadRulesPtr[MAX_PANEL+panel]) - else: - sQR = (self.distantQuadRulesPtr[panel]) - qr2 = (sQR.qr) - PSI = sQR.PSI - PHI = sQR.PHI3 - qr2.rule1.nodesInGlobalCoords(simplex1, self.x) - qr2.rule2.nodesInGlobalCoords(simplex2, self.y) - - k = 0 - for i in range(qr2.rule1.num_nodes): - for j in range(qr2.rule2.num_nodes): - w = qr2.weights[k] - self.temp[k] = w * self.kernel.evalPtr(dim, - &self.x[i, 0], - &self.y[j, 0]) - self.temp2[k] = w * self.kernel.evalPtr(dim, - &self.y[j, 0], - &self.x[i, 0]) - k += 1 - - k = 0 - for I in range(2*self.DoFMap.dofs_per_element): - for J in range(2*self.DoFMap.dofs_per_element): - if mask[k]: - val = 0. - for i in range(qr2.num_nodes): - val += (self.temp[i] * PHI[0, I, i] - self.temp2[i] * PHI[1, I, i]) * PSI[J, i] - contrib[k] = val*vol - k += 1 - else: - if panel < 0: - sQR = (self.distantQuadRulesPtr[MAX_PANEL+panel]) - else: - sQR = (self.distantQuadRulesPtr[panel]) - qr2 = (sQR.qr) - if sQR.qrTransformed0 is not None: - qr0trans = sQR.qrTransformed0 - else: - qr0 = qr2.rule1 - qr0trans = transformQuadratureRule(qr0) - sQR.qrTransformed0 = qr0trans - if sQR.qrTransformed1 is not None: - qr1trans = sQR.qrTransformed1 - else: - qr1 = qr2.rule2 - qr1trans = transformQuadratureRule(qr1) - sQR.qrTransformed1 = qr1trans - - numQuadNodes0 = qr0trans.num_nodes - numQuadNodes1 = qr1trans.num_nodes - - vol = vol1*vol2 - dofs_per_element = self.DoFMap.dofs_per_element - - A1 = a_A1 - b1 = a_b1 - A2 = a_A2 - - self.kernel.interaction.startLoopSubSimplices_Simplex(simplex1, simplex2) - while self.kernel.interaction.nextSubSimplex_Simplex(A1, b1, &c1): - qr0trans.setAffineBaryTransform(A1, b1) - qr0trans.nodesInGlobalCoords(simplex1, self.x) - for i in range(qr0trans.num_nodes): - self.kernel.interaction.startLoopSubSimplices_Node(self.x[i, :], simplex2) - while self.kernel.interaction.nextSubSimplex_Node(A2, &c2): - qr1trans.setLinearBaryTransform(A2) - qr1trans.nodesInGlobalCoords(simplex2, self.y) - for j in range(qr1trans.num_nodes): - w = qr0trans.weights[i]*qr1trans.weights[j]*c1 * c2 * vol - val = w*self.kernel.evalPtr(dim, &self.x[i, 0], &self.y[j, 0]) - val2 = w*self.kernel.evalPtr(dim, &self.y[j, 0], &self.x[i, 0]) - k = 0 - for I in range(2*dofs_per_element): - if I < dofs_per_element: - PHI_I_0 = self.getLocalShapeFunction(I).evalStrided(&qr0trans.nodes[0, i], numQuadNodes0) - PHI_I_1 = 0. - else: - PHI_I_0 = 0. - PHI_I_1 = self.getLocalShapeFunction(I-dofs_per_element).evalStrided(&qr1trans.nodes[0, j], numQuadNodes1) - for J in range(2*dofs_per_element): - if mask[k]: - if J < dofs_per_element: - PSI_J = self.getLocalShapeFunction(J).evalStrided(&qr0trans.nodes[0, i], numQuadNodes0) - else: - PSI_J = -self.getLocalShapeFunction(J-dofs_per_element).evalStrided(&qr1trans.nodes[0, j], numQuadNodes1) - contrib[k] += (val * PHI_I_0 - val2 * PHI_I_1) * PSI_J - k += 1 - - cdef void addQuadRule_boundary(self, panelType panel): - cdef: - simplexQuadratureRule qr0, qr1 - doubleSimplexQuadratureRule qr2 - specialQuadRule sQR - REAL_t[:, ::1] PHI - INDEX_t i, j, k, l - shapeFunction sf - qr0 = simplexXiaoGimbutas(panel, self.dim) - qr1 = simplexDuffyTransformation(panel, self.dim, self.dim-1) - qr2 = doubleSimplexQuadratureRule(qr0, qr1) - PHI = uninitialized((self.DoFMap.dofs_per_element, qr2.num_nodes), dtype=REAL) - for i in range(self.DoFMap.dofs_per_element): - sf = self.getLocalShapeFunction(i) - for j in range(qr2.rule1.num_nodes): - for k in range(qr2.rule2.num_nodes): - l = j*qr2.rule2.num_nodes+k - PHI[i, l] = sf(qr2.rule1.nodes[:, j]) - sQR = specialQuadRule(qr2, PHI=PHI) - self.distantQuadRules[panel] = sQR - self.distantQuadRulesPtr[panel] = (self.distantQuadRules[panel]) - - if qr2.rule1.num_nodes > self.x.shape[0]: - self.x = uninitialized((qr2.rule1.num_nodes, self.dim), dtype=REAL) - if qr2.rule2.num_nodes > self.y.shape[0]: - self.y = uninitialized((qr2.rule2.num_nodes, self.dim), dtype=REAL) - if qr2.num_nodes > self.temp.shape[0]: - self.temp = uninitialized((qr2.num_nodes), dtype=REAL) - - cdef void eval_distant_boundary(self, - REAL_t[::1] contrib, - panelType panel, - MASK_t mask=ALL): - cdef: - INDEX_t k, m, i, j, I, J - REAL_t vol, val, vol1 = self.vol1, vol2 = self.vol2 - doubleSimplexQuadratureRule qr2 - REAL_t[:, ::1] PHI - REAL_t[:, ::1] simplex1 = self.simplex1 - REAL_t[:, ::1] simplex2 = self.simplex2 - INDEX_t dim = simplex1.shape[1] - REAL_t normW, nw - - # Kernel: - # \Gamma(x,y) = n \dot (x-y) * C(d,s) / (2s) / |x-y|^{d+2s} - # with inward normal n. - # - # Rewrite as - # \Gamma(x,y) = [ n \dot (x-y)/|x-y| ] * [ C(d,s) / (2s) / |x-y|^{d-1+2s} ] - # \--------------------------------/ - # | - # boundaryKernel - # - # n is independent of x and y - if dim == 2: - self.n[0] = simplex2[1, 1] - simplex2[0, 1] - self.n[1] = simplex2[0, 0] - simplex2[1, 0] - # F is same as vol2 - val = 1./sqrt(mydot(self.n, self.n)) - self.n[0] *= val - self.n[1] *= val - elif dim == 3: - for j in range(dim): - self.x[0, j] = simplex2[1, j]-simplex2[0, j] - for j in range(dim): - self.x[1, j] = simplex2[2, j]-simplex2[0, j] - self.n[0] = self.x[0, 1]*self.x[1, 2]-self.x[0, 2]*self.x[1, 1] - self.n[1] = self.x[0, 2]*self.x[1, 0]-self.x[0, 0]*self.x[1, 2] - self.n[2] = self.x[0, 0]*self.x[1, 1]-self.x[0, 1]*self.x[1, 0] - val = 1./sqrt(mydot(self.n, self.n)) - self.n[0] *= val - self.n[1] *= val - self.n[2] *= val - - contrib[:] = 0. - - vol = vol1*vol2 - if panel < 0: - sQR = (self.distantQuadRulesPtr[MAX_PANEL+panel]) - else: - sQR = (self.distantQuadRulesPtr[panel]) - qr2 = (sQR.qr) - PHI = sQR.PHI - qr2.rule1.nodesInGlobalCoords(simplex1, self.x) - qr2.rule2.nodesInGlobalCoords(simplex2, self.y) - - for k in range(qr2.rule1.num_nodes): - for m in range(qr2.rule2.num_nodes): - if dim == 1: - nw = 1. - else: - normW = 0. - for j in range(dim): - self.w[j] = self.y[m, j]-self.x[k, j] - normW += self.w[j]**2 - normW = 1./sqrt(normW) - for j in range(dim): - self.w[j] *= normW - nw = mydot(self.n, self.w) - i = k*qr2.rule2.num_nodes+m - self.temp[i] = qr2.weights[i] * nw * self.kernel.evalPtr(dim, &self.x[k, 0], &self.y[m, 0]) - - k = 0 - for I in range(self.DoFMap.dofs_per_element): - for J in range(I, self.DoFMap.dofs_per_element): - if mask[k]: - val = 0. - for i in range(qr2.num_nodes): - val += self.temp[i] * PHI[I, i] * PHI[J, i] - contrib[k] = val*vol - k += 1 - +# cdef class nonlocalLaplacian(double_local_matrix_t): +# def __init__(self, +# Kernel kernel, +# meshBase mesh, DoFMap dm, +# num_dofs=None, INDEX_t manifold_dim2=-1): +# cdef: +# shapeFunction sf +# INDEX_t i +# if manifold_dim2 < 0: +# manifold_dim2 = mesh.manifold_dim +# double_local_matrix_t.__init__(self, mesh.dim, mesh.manifold_dim, manifold_dim2, dm) +# if num_dofs is None: +# self.num_dofs = dm.num_dofs +# else: +# self.num_dofs = num_dofs +# self.hmin = mesh.hmin +# self.H0 = mesh.diam/sqrt(8) +# self.localShapeFunctions = malloc(self.DoFMap.dofs_per_element*sizeof(void*)) +# for i in range(self.DoFMap.dofs_per_element): +# sf = dm.localShapeFunctions[i] +# ((self.localShapeFunctions+i*sizeof(void*)))[0] = sf +# self.specialQuadRules = {} +# self.distantQuadRulesPtr = malloc(MAX_PANEL*sizeof(void*)) +# for i in range(MAX_PANEL): +# self.distantQuadRulesPtr[i] = NULL + +# self.x = uninitialized((0, self.dim), dtype=REAL) +# self.y = uninitialized((0, self.dim), dtype=REAL) +# self.temp = uninitialized((0), dtype=REAL) +# # private +# self.vec = uninitialized((kernel.vectorSize), dtype=REAL) +# self.vec2 = uninitialized((kernel.vectorSize), dtype=REAL) +# self.tempVector = uninitialized((0, kernel.vectorSize), dtype=REAL) +# # release + +# self.n = uninitialized((self.dim), dtype=REAL) +# self.w = uninitialized((self.dim), dtype=REAL) + +# self.kernel = kernel + +# if self.kernel.variable: +# self.symmetricCells = self.kernel.symmetric +# self.symmetricLocalMatrix = self.kernel.symmetric +# else: +# self.symmetricCells = True +# self.symmetricLocalMatrix = True + +# if self.kernel.variableHorizon: +# self.symmetricCells = False + +# def __del__(self): +# free(self.localShapeFunctions) +# free(self.distantQuadRulesPtr) + +# cdef void getNearQuadRule(self, panelType panel): +# raise NotImplementedError() + +# cdef void computeCellPairIdentifier(self, INDEX_t[::1] ID, INDEX_t *perm): +# assert not self.kernel.variable +# if self.kernel.finiteHorizon: +# self.computeExtremeDistances() +# if self.dmax2 <= self.kernel.getHorizonValue2(): +# # entirely within horizon +# self.computeCellPairIdentifierBase(ID, perm) +# elif self.dmin2 >= self.kernel.getHorizonValue2(): +# # entirely outside of horizon +# ID[0] = IGNORED +# else: +# # on horizon +# ID[0] = ON_HORIZON +# else: +# self.computeCellPairIdentifierBase(ID, perm) + +# cpdef panelType getPanelType(self): +# # Given two cells, determines their relationship: +# # - COMMON_FACE +# # - COMMON_EDGE +# # - COMMON_VERTEX +# # - DISTANT +# # - IGNORED +# cdef: +# panelType panel +# REAL_t d, h1, h2 +# REAL_t alpha +# panel = self.getProtoPanelType() + +# if panel == IGNORED: +# return IGNORED + +# if self.kernel.variable: +# if self.kernel.piecewise: +# self.kernel.evalParams(self.center1, self.center2) +# else: +# self.kernel.evalParamsOnSimplices(self.center1, self.center2, self.simplex1, self.simplex2) + +# if panel == DISTANT: +# if self.kernel.interaction.getRelativePosition(self.simplex1, self.simplex2) == REMOTE: +# return IGNORED + +# self.computeCenterDistance() +# d = sqrt(self.dcenter2) + +# if self.symmetricCells: +# h1 = self.precomputedH[self.cellNo1] +# else: +# h1 = self.get_h_simplex(self.simplex1) +# if self.cells1.shape[1] == self.cells2.shape[1]: +# if self.symmetricCells: +# h2 = self.precomputedH[self.cellNo2] +# else: +# h2 = self.get_h_simplex(self.simplex2) +# else: +# h2 = self.get_h_surface_simplex(self.simplex2) +# panel = self.getQuadOrder(h1, h2, d) +# elif self.kernel.variable: +# self.getNearQuadRule(panel) +# if panel < 0: +# alpha = self.kernel.getSingularityValue() +# if alpha == 0.: +# self.kernel.interaction.getRelativePosition(self.simplex1, self.simplex2) +# return panel + +# def __repr__(self): +# return (super(nonlocalLaplacian, self).__repr__() + +# 'kernel: {}\n'.format(self.kernel)) + +# cdef inline shapeFunction getLocalShapeFunction(self, INDEX_t local_dof): +# return ((((self.localShapeFunctions+local_dof*sizeof(void*)))[0])) + +# cdef void addQuadRule(self, panelType panel): +# cdef: +# simplexQuadratureRule qr0, qr1 +# doubleSimplexQuadratureRule qr2 +# specialQuadRule sQR +# REAL_t[:, ::1] PSI +# INDEX_t I, k, i, j +# INDEX_t numQuadNodes0, numQuadNodes1, dofs_per_element +# shapeFunction sf +# qr0 = simplexXiaoGimbutas(panel, self.dim, self.manifold_dim1) +# qr1 = qr0 +# qr2 = doubleSimplexQuadratureRule(qr0, qr1) +# numQuadNodes0 = qr0.num_nodes +# numQuadNodes1 = qr1.num_nodes +# dofs_per_element = self.DoFMap.dofs_per_element +# PSI = uninitialized((2*dofs_per_element, +# qr2.num_nodes), dtype=REAL) +# # phi_i(x) - phi_i(y) = phi_i(x) +# for I in range(self.DoFMap.dofs_per_element): +# sf = self.getLocalShapeFunction(I) +# k = 0 +# for i in range(numQuadNodes0): +# for j in range(numQuadNodes1): +# PSI[I, k] = sf(qr0.nodes[:, i]) +# k += 1 +# # phi_i(x) - phi_i(y) = -phi_i(y) +# for I in range(self.DoFMap.dofs_per_element): +# sf = self.getLocalShapeFunction(I) +# k = 0 +# for i in range(numQuadNodes0): +# for j in range(numQuadNodes1): +# PSI[I+dofs_per_element, k] = -sf(qr1.nodes[:, j]) +# k += 1 +# sQR = specialQuadRule(qr2, PSI) +# self.distantQuadRules[panel] = sQR +# self.distantQuadRulesPtr[panel] = (self.distantQuadRules[panel]) + +# if numQuadNodes0 > self.x.shape[0]: +# self.x = uninitialized((numQuadNodes0, self.dim), dtype=REAL) +# if numQuadNodes1 > self.y.shape[0]: +# self.y = uninitialized((numQuadNodes1, self.dim), dtype=REAL) +# if numQuadNodes0*numQuadNodes1 > self.temp.shape[0]: +# self.temp = uninitialized((numQuadNodes0*numQuadNodes1), dtype=REAL) +# # private +# self.tempVector = uninitialized((numQuadNodes0*numQuadNodes1, self.kernel.vectorSize), dtype=REAL) +# # release + +# cdef void addQuadRule_nonSym(self, panelType panel): +# cdef: +# simplexQuadratureRule qr0, qr1 +# doubleSimplexQuadratureRule qr2 +# specialQuadRule sQR +# REAL_t[:, ::1] PSI +# REAL_t[:, :, ::1] PHI +# INDEX_t I, k, i, j +# INDEX_t numQuadNodes0, numQuadNodes1, dofs_per_element +# shapeFunction sf +# qr0 = simplexXiaoGimbutas(panel, self.dim, self.manifold_dim1) +# qr1 = qr0 +# qr2 = doubleSimplexQuadratureRule(qr0, qr1) +# numQuadNodes0 = qr0.num_nodes +# numQuadNodes1 = qr1.num_nodes +# dofs_per_element = self.DoFMap.dofs_per_element +# PSI = uninitialized((2*dofs_per_element, +# qr2.num_nodes), dtype=REAL) +# PHI = uninitialized((2, +# 2*dofs_per_element, +# qr2.num_nodes), dtype=REAL) +# # phi_i(x) - phi_i(y) = phi_i(x) +# for I in range(self.DoFMap.dofs_per_element): +# sf = self.getLocalShapeFunction(I) +# k = 0 +# for i in range(numQuadNodes0): +# for j in range(numQuadNodes1): +# PHI[0, I, k] = sf(qr0.nodes[:, i]) +# PHI[1, I, k] = 0. +# PSI[I, k] = PHI[0, I, k] +# k += 1 +# # phi_i(x) - phi_i(y) = -phi_i(y) +# for I in range(self.DoFMap.dofs_per_element): +# sf = self.getLocalShapeFunction(I) +# k = 0 +# for i in range(numQuadNodes0): +# for j in range(numQuadNodes1): +# PHI[0, I+dofs_per_element, k] = 0. +# PHI[1, I+dofs_per_element, k] = sf(qr1.nodes[:, j]) +# PSI[I+dofs_per_element, k] = -PHI[1, I+dofs_per_element, k] +# k += 1 +# sQR = specialQuadRule(qr2, PSI, PHI3=PHI) +# self.distantQuadRules[panel] = sQR +# self.distantQuadRulesPtr[panel] = (self.distantQuadRules[panel]) + +# if numQuadNodes0 > self.x.shape[0]: +# self.x = uninitialized((numQuadNodes0, self.dim), dtype=REAL) +# if numQuadNodes1 > self.y.shape[0]: +# self.y = uninitialized((numQuadNodes1, self.dim), dtype=REAL) +# if numQuadNodes0*numQuadNodes1 > self.temp.shape[0]: +# self.temp = uninitialized((numQuadNodes0*numQuadNodes1), dtype=REAL) +# self.temp2 = uninitialized((numQuadNodes0*numQuadNodes1), dtype=REAL) +# # private +# self.tempVector = uninitialized((numQuadNodes0*numQuadNodes1, self.kernel.vectorSize), dtype=REAL) +# self.tempVector2 = uninitialized((numQuadNodes0*numQuadNodes1, self.kernel.vectorSize), dtype=REAL) +# # release + +# cdef void getNonSingularNearQuadRule(self, panelType panel): +# cdef: +# simplexQuadratureRule qr0, qr1 +# doubleSimplexQuadratureRule qr2 +# specialQuadRule sQR +# REAL_t[:, ::1] PSI +# INDEX_t I, k, i, j +# INDEX_t numQuadNodes0, numQuadNodes1, dofs_per_element +# shapeFunction sf +# try: +# sQR = (self.distantQuadRules[MAX_PANEL+panel]) +# except KeyError: +# quadOrder = max(ceil(self.target_order), 2) +# qr0 = simplexXiaoGimbutas(quadOrder, self.dim) +# qr1 = qr0 +# qr2 = doubleSimplexQuadratureRule(qr0, qr1) +# numQuadNodes0 = qr0.num_nodes +# numQuadNodes1 = qr1.num_nodes +# dofs_per_element = self.DoFMap.dofs_per_element +# PSI = uninitialized((2*dofs_per_element, +# qr2.num_nodes), +# dtype=REAL) + +# # phi_i(x) - phi_i(y) = phi_i(x) +# for I in range(dofs_per_element): +# sf = self.getLocalShapeFunction(I) +# k = 0 +# for i in range(numQuadNodes0): +# for j in range(numQuadNodes1): +# PSI[I, k] = sf(qr0.nodes[:, i]) +# k += 1 +# # phi_i(x) - phi_i(y) = -phi_i(y) +# for I in range(dofs_per_element): +# sf = self.getLocalShapeFunction(I) +# k = 0 +# for i in range(numQuadNodes0): +# for j in range(numQuadNodes1): +# PSI[I+dofs_per_element, k] = -sf(qr1.nodes[:, j]) +# k += 1 +# sQR = specialQuadRule(qr2, PSI) +# self.distantQuadRules[MAX_PANEL+panel] = sQR +# self.distantQuadRulesPtr[MAX_PANEL+panel] = (self.distantQuadRules[MAX_PANEL+panel]) +# if numQuadNodes0 > self.x.shape[0]: +# self.x = uninitialized((numQuadNodes0, self.dim), dtype=REAL) +# if numQuadNodes1 > self.y.shape[0]: +# self.y = uninitialized((numQuadNodes1, self.dim), dtype=REAL) +# if qr2.num_nodes > self.temp.shape[0]: +# self.temp = uninitialized((qr2.num_nodes), dtype=REAL) +# # private +# self.tempVector = uninitialized((qr2.num_nodes, self.kernel.vectorSize), dtype=REAL) +# # release + +# cdef void eval_distant(self, +# REAL_t[::1] contrib, +# panelType panel, +# MASK_t mask=ALL): +# cdef: +# INDEX_t k, i, j, I, J +# REAL_t vol, val, vol1 = self.vol1, vol2 = self.vol2 +# doubleSimplexQuadratureRule qr2 +# REAL_t[:, ::1] PSI +# REAL_t[:, ::1] simplex1 = self.simplex1 +# REAL_t[:, ::1] simplex2 = self.simplex2 +# INDEX_t dim = simplex1.shape[1] +# REAL_t c1, c2, PSI_I, PSI_J +# transformQuadratureRule qr0trans, qr1trans +# INDEX_t dofs_per_element, numQuadNodes0, numQuadNodes1 +# REAL_t a_b1[3] +# REAL_t a_A1[3][3] +# REAL_t a_A2[3][3] +# REAL_t[::1] b1 +# REAL_t[:, ::1] A1, A2 +# BOOL_t cutElements = False + +# if self.kernel.finiteHorizon: +# # check if the horizon might cut the elements +# if self.kernel.interaction.relPos == CUT: +# cutElements = True +# if self.kernel.complement: +# cutElements = False +# # TODO: cutElements should be set to True, but +# # need to figure out the element +# # transformation. + +# contrib[:] = 0. + +# if not cutElements: +# vol = vol1*vol2 +# if panel < 0: +# sQR = (self.distantQuadRulesPtr[MAX_PANEL+panel]) +# else: +# sQR = (self.distantQuadRulesPtr[panel]) +# qr2 = (sQR.qr) +# PSI = sQR.PSI +# qr2.rule1.nodesInGlobalCoords(simplex1, self.x) +# qr2.rule2.nodesInGlobalCoords(simplex2, self.y) + +# k = 0 +# for i in range(qr2.rule1.num_nodes): +# for j in range(qr2.rule2.num_nodes): +# self.temp[k] = qr2.weights[k]*self.kernel.evalPtr(dim, +# &self.x[i, 0], +# &self.y[j, 0]) +# k += 1 + +# k = 0 +# for I in range(2*self.DoFMap.dofs_per_element): +# for J in range(I, 2*self.DoFMap.dofs_per_element): +# if mask[k]: +# val = 0. +# for i in range(qr2.num_nodes): +# val += self.temp[i] * PSI[I, i] * PSI[J, i] +# contrib[k] = val*vol +# k += 1 +# else: +# if panel < 0: +# sQR = (self.distantQuadRulesPtr[MAX_PANEL+panel]) +# else: +# sQR = (self.distantQuadRulesPtr[panel]) +# qr2 = (sQR.qr) +# if sQR.qrTransformed0 is not None: +# qr0trans = sQR.qrTransformed0 +# else: +# qr0 = qr2.rule1 +# qr0trans = transformQuadratureRule(qr0) +# sQR.qrTransformed0 = qr0trans +# if sQR.qrTransformed1 is not None: +# qr1trans = sQR.qrTransformed1 +# else: +# qr1 = qr2.rule2 +# qr1trans = transformQuadratureRule(qr1) +# sQR.qrTransformed1 = qr1trans + +# numQuadNodes0 = qr0trans.num_nodes +# numQuadNodes1 = qr1trans.num_nodes + +# vol = vol1*vol2 +# dofs_per_element = self.DoFMap.dofs_per_element + +# A1 = a_A1 +# b1 = a_b1 +# A2 = a_A2 + +# self.kernel.interaction.startLoopSubSimplices_Simplex(simplex1, simplex2) +# while self.kernel.interaction.nextSubSimplex_Simplex(A1, b1, &c1): +# qr0trans.setAffineBaryTransform(A1, b1) +# qr0trans.nodesInGlobalCoords(simplex1, self.x) +# for i in range(qr0trans.num_nodes): +# self.kernel.interaction.startLoopSubSimplices_Node(self.x[i, :], simplex2) +# while self.kernel.interaction.nextSubSimplex_Node(A2, &c2): +# qr1trans.setLinearBaryTransform(A2) +# qr1trans.nodesInGlobalCoords(simplex2, self.y) +# for j in range(qr1trans.num_nodes): +# val = qr0trans.weights[i]*qr1trans.weights[j]*self.kernel.evalPtr(dim, &self.x[i, 0], &self.y[j, 0]) +# val *= c1 * c2 * vol +# k = 0 +# for I in range(2*dofs_per_element): +# if I < dofs_per_element: +# PSI_I = self.getLocalShapeFunction(I).evalStrided(&qr0trans.nodes[0, i], numQuadNodes0) +# else: +# PSI_I = -self.getLocalShapeFunction(I-dofs_per_element).evalStrided(&qr1trans.nodes[0, j], numQuadNodes1) +# for J in range(I, 2*dofs_per_element): +# if mask[k]: +# if J < dofs_per_element: +# PSI_J = self.getLocalShapeFunction(J).evalStrided(&qr0trans.nodes[0, i], numQuadNodes0) +# else: +# PSI_J = -self.getLocalShapeFunction(J-dofs_per_element).evalStrided(&qr1trans.nodes[0, j], numQuadNodes1) +# contrib[k] += val * PSI_I*PSI_J +# k += 1 + + +# cdef void eval_distant_nonsym(self, +# REAL_t[::1] contrib, +# panelType panel, +# MASK_t mask=ALL): +# cdef: +# INDEX_t k, i, j, I, J +# REAL_t vol, val, vol1 = self.vol1, vol2 = self.vol2 +# doubleSimplexQuadratureRule qr2 +# REAL_t[:, ::1] PSI +# REAL_t[:, :, ::1] PHI +# REAL_t[:, ::1] simplex1 = self.simplex1 +# REAL_t[:, ::1] simplex2 = self.simplex2 +# INDEX_t dim = simplex1.shape[1] +# BOOL_t cutElements = False +# REAL_t w +# REAL_t c1, c2, val2, PHI_I_0, PHI_I_1, PSI_J +# transformQuadratureRule qr0trans, qr1trans +# INDEX_t dofs_per_element, numQuadNodes0, numQuadNodes1 +# REAL_t a_b1[3] +# REAL_t a_A1[3][3] +# REAL_t a_A2[3][3] +# REAL_t[::1] b1 +# REAL_t[:, ::1] A1, A2 + +# if self.kernel.finiteHorizon: +# # check if the horizon might cut the elements +# if self.kernel.interaction.relPos == CUT: +# cutElements = True +# if self.kernel.complement: +# cutElements = False +# # TODO: cutElements should be set to True, but +# # need to figure out the element +# # transformation. + +# contrib[:] = 0. + +# if not cutElements: +# vol = vol1*vol2 +# if panel < 0: +# sQR = (self.distantQuadRulesPtr[MAX_PANEL+panel]) +# else: +# sQR = (self.distantQuadRulesPtr[panel]) +# qr2 = (sQR.qr) +# PSI = sQR.PSI +# PHI = sQR.PHI3 +# qr2.rule1.nodesInGlobalCoords(simplex1, self.x) +# qr2.rule2.nodesInGlobalCoords(simplex2, self.y) + +# k = 0 +# for i in range(qr2.rule1.num_nodes): +# for j in range(qr2.rule2.num_nodes): +# w = qr2.weights[k] +# self.temp[k] = w * self.kernel.evalPtr(dim, +# &self.x[i, 0], +# &self.y[j, 0]) +# self.temp2[k] = w * self.kernel.evalPtr(dim, +# &self.y[j, 0], +# &self.x[i, 0]) +# k += 1 + +# k = 0 +# for I in range(2*self.DoFMap.dofs_per_element): +# for J in range(2*self.DoFMap.dofs_per_element): +# if mask[k]: +# val = 0. +# for i in range(qr2.num_nodes): +# val += (self.temp[i] * PHI[0, I, i] - self.temp2[i] * PHI[1, I, i]) * PSI[J, i] +# contrib[k] = val*vol +# k += 1 +# else: +# if panel < 0: +# sQR = (self.distantQuadRulesPtr[MAX_PANEL+panel]) +# else: +# sQR = (self.distantQuadRulesPtr[panel]) +# qr2 = (sQR.qr) +# if sQR.qrTransformed0 is not None: +# qr0trans = sQR.qrTransformed0 +# else: +# qr0 = qr2.rule1 +# qr0trans = transformQuadratureRule(qr0) +# sQR.qrTransformed0 = qr0trans +# if sQR.qrTransformed1 is not None: +# qr1trans = sQR.qrTransformed1 +# else: +# qr1 = qr2.rule2 +# qr1trans = transformQuadratureRule(qr1) +# sQR.qrTransformed1 = qr1trans + +# numQuadNodes0 = qr0trans.num_nodes +# numQuadNodes1 = qr1trans.num_nodes + +# vol = vol1*vol2 +# dofs_per_element = self.DoFMap.dofs_per_element + +# A1 = a_A1 +# b1 = a_b1 +# A2 = a_A2 + +# self.kernel.interaction.startLoopSubSimplices_Simplex(simplex1, simplex2) +# while self.kernel.interaction.nextSubSimplex_Simplex(A1, b1, &c1): +# qr0trans.setAffineBaryTransform(A1, b1) +# qr0trans.nodesInGlobalCoords(simplex1, self.x) +# for i in range(qr0trans.num_nodes): +# self.kernel.interaction.startLoopSubSimplices_Node(self.x[i, :], simplex2) +# while self.kernel.interaction.nextSubSimplex_Node(A2, &c2): +# qr1trans.setLinearBaryTransform(A2) +# qr1trans.nodesInGlobalCoords(simplex2, self.y) +# for j in range(qr1trans.num_nodes): +# w = qr0trans.weights[i]*qr1trans.weights[j]*c1 * c2 * vol +# val = w*self.kernel.evalPtr(dim, &self.x[i, 0], &self.y[j, 0]) +# val2 = w*self.kernel.evalPtr(dim, &self.y[j, 0], &self.x[i, 0]) +# k = 0 +# for I in range(2*dofs_per_element): +# if I < dofs_per_element: +# PHI_I_0 = self.getLocalShapeFunction(I).evalStrided(&qr0trans.nodes[0, i], numQuadNodes0) +# PHI_I_1 = 0. +# else: +# PHI_I_0 = 0. +# PHI_I_1 = self.getLocalShapeFunction(I-dofs_per_element).evalStrided(&qr1trans.nodes[0, j], numQuadNodes1) +# for J in range(2*dofs_per_element): +# if mask[k]: +# if J < dofs_per_element: +# PSI_J = self.getLocalShapeFunction(J).evalStrided(&qr0trans.nodes[0, i], numQuadNodes0) +# else: +# PSI_J = -self.getLocalShapeFunction(J-dofs_per_element).evalStrided(&qr1trans.nodes[0, j], numQuadNodes1) +# contrib[k] += (val * PHI_I_0 - val2 * PHI_I_1) * PSI_J +# k += 1 + +# cdef void addQuadRule_boundary(self, panelType panel): +# cdef: +# simplexQuadratureRule qr0, qr1 +# doubleSimplexQuadratureRule qr2 +# specialQuadRule sQR +# REAL_t[:, ::1] PHI +# INDEX_t i, j, k, l +# shapeFunction sf +# qr0 = simplexXiaoGimbutas(panel, self.dim) +# qr1 = simplexDuffyTransformation(panel, self.dim, self.dim-1) +# qr2 = doubleSimplexQuadratureRule(qr0, qr1) +# PHI = uninitialized((self.DoFMap.dofs_per_element, qr2.num_nodes), dtype=REAL) +# for i in range(self.DoFMap.dofs_per_element): +# sf = self.getLocalShapeFunction(i) +# for j in range(qr2.rule1.num_nodes): +# for k in range(qr2.rule2.num_nodes): +# l = j*qr2.rule2.num_nodes+k +# PHI[i, l] = sf(qr2.rule1.nodes[:, j]) +# sQR = specialQuadRule(qr2, PHI=PHI) +# self.distantQuadRules[panel] = sQR +# self.distantQuadRulesPtr[panel] = (self.distantQuadRules[panel]) + +# if qr2.rule1.num_nodes > self.x.shape[0]: +# self.x = uninitialized((qr2.rule1.num_nodes, self.dim), dtype=REAL) +# if qr2.rule2.num_nodes > self.y.shape[0]: +# self.y = uninitialized((qr2.rule2.num_nodes, self.dim), dtype=REAL) +# if qr2.num_nodes > self.temp.shape[0]: +# self.temp = uninitialized((qr2.num_nodes), dtype=REAL) +# # private +# self.tempVector = uninitialized((qr2.num_nodes, self.kernel.vectorSize), dtype=REAL) +# # release + +# cdef void eval_distant_boundary(self, +# REAL_t[::1] contrib, +# panelType panel, +# MASK_t mask=ALL): +# cdef: +# INDEX_t k, m, i, j, I, J +# REAL_t vol, val, vol1 = self.vol1, vol2 = self.vol2 +# doubleSimplexQuadratureRule qr2 +# REAL_t[:, ::1] PHI +# REAL_t[:, ::1] simplex1 = self.simplex1 +# REAL_t[:, ::1] simplex2 = self.simplex2 +# INDEX_t dim = simplex1.shape[1] +# REAL_t normW, nw + +# # Kernel: +# # \Gamma(x,y) = n \dot (x-y) * C(d,s) / (2s) / |x-y|^{d+2s} +# # with inward normal n. +# # +# # Rewrite as +# # \Gamma(x,y) = [ n \dot (x-y)/|x-y| ] * [ C(d,s) / (2s) / |x-y|^{d-1+2s} ] +# # \--------------------------------/ +# # | +# # boundaryKernel +# # +# # n is independent of x and y +# if dim == 2: +# self.n[0] = simplex2[1, 1] - simplex2[0, 1] +# self.n[1] = simplex2[0, 0] - simplex2[1, 0] +# # F is same as vol2 +# val = 1./sqrt(mydot(self.n, self.n)) +# self.n[0] *= val +# self.n[1] *= val +# elif dim == 3: +# for j in range(dim): +# self.x[0, j] = simplex2[1, j]-simplex2[0, j] +# for j in range(dim): +# self.x[1, j] = simplex2[2, j]-simplex2[0, j] +# self.n[0] = self.x[0, 1]*self.x[1, 2]-self.x[0, 2]*self.x[1, 1] +# self.n[1] = self.x[0, 2]*self.x[1, 0]-self.x[0, 0]*self.x[1, 2] +# self.n[2] = self.x[0, 0]*self.x[1, 1]-self.x[0, 1]*self.x[1, 0] +# val = 1./sqrt(mydot(self.n, self.n)) +# self.n[0] *= val +# self.n[1] *= val +# self.n[2] *= val + +# contrib[:] = 0. + +# vol = vol1*vol2 +# if panel < 0: +# sQR = (self.distantQuadRulesPtr[MAX_PANEL+panel]) +# else: +# sQR = (self.distantQuadRulesPtr[panel]) +# qr2 = (sQR.qr) +# PHI = sQR.PHI +# qr2.rule1.nodesInGlobalCoords(simplex1, self.x) +# qr2.rule2.nodesInGlobalCoords(simplex2, self.y) + +# for k in range(qr2.rule1.num_nodes): +# for m in range(qr2.rule2.num_nodes): +# if dim == 1: +# nw = 1. +# else: +# normW = 0. +# for j in range(dim): +# self.w[j] = self.y[m, j]-self.x[k, j] +# normW += self.w[j]**2 +# normW = 1./sqrt(normW) +# for j in range(dim): +# self.w[j] *= normW +# nw = mydot(self.n, self.w) +# i = k*qr2.rule2.num_nodes+m +# self.temp[i] = qr2.weights[i] * nw * self.kernel.evalPtr(dim, &self.x[k, 0], &self.y[m, 0]) + +# k = 0 +# for I in range(self.DoFMap.dofs_per_element): +# for J in range(I, self.DoFMap.dofs_per_element): +# if mask[k]: +# val = 0. +# for i in range(qr2.num_nodes): +# val += self.temp[i] * PHI[I, i] * PHI[J, i] +# contrib[k] = val*vol +# k += 1 + +# # private +# cdef void eval_distant_tensor(self, +# REAL_t[::1] contrib, +# panelType panel, +# MASK_t mask=ALL): +# cdef: +# INDEX_t k, i, j, I, J, m, p, q, start +# REAL_t vol, val, vol1 = self.vol1, vol2 = self.vol2, dist2 +# doubleSimplexQuadratureRule qr2 +# REAL_t[:, ::1] PSI +# REAL_t[:, ::1] simplex1 = self.simplex1 +# REAL_t[:, ::1] simplex2 = self.simplex2 +# INDEX_t dim = simplex1.shape[1] +# REAL_t c1, c2, PSI_I, PSI_J +# transformQuadratureRule qr0trans, qr1trans +# INDEX_t dofs_per_element, numQuadNodes0, numQuadNodes1 +# REAL_t a_b1[3] +# REAL_t a_A1[3][3] +# REAL_t a_A2[3][3] +# REAL_t[::1] b1 +# REAL_t[:, ::1] A1, A2 +# BOOL_t cutElements = False + +# if self.kernel.finiteHorizon: +# # check if the horizon might cut the elements +# if self.kernel.interaction.relPos == CUT: +# cutElements = True +# if self.kernel.complement: +# cutElements = False +# # TODO: cutElements should be set to True, but +# # need to figure out the element +# # transformation. + +# contrib[:] = 0. + +# if not cutElements: +# vol = vol1*vol2 +# if panel < 0: +# sQR = (self.distantQuadRulesPtr[MAX_PANEL+panel]) +# else: +# sQR = (self.distantQuadRulesPtr[panel]) +# qr2 = (sQR.qr) +# PSI = sQR.PSI +# qr2.rule1.nodesInGlobalCoords(simplex1, self.x) +# qr2.rule2.nodesInGlobalCoords(simplex2, self.y) + +# m = 0 +# for i in range(qr2.rule1.num_nodes): +# for j in range(qr2.rule2.num_nodes): +# dist2 = 0. +# for p in range(dim): +# dist2 += (self.x[i, p]-self.y[j, p])*(self.x[i, p]-self.y[j, p]) +# self.temp[m] = qr2.weights[m]*self.kernel.evalPtr(dim, +# &self.x[i, 0], +# &self.y[j, 0]) / dist2 +# m += 1 + +# k = 0 +# for I in range(2*self.DoFMap.dofs_per_element): +# for p in range(dim): +# for J in range(I, 2*self.DoFMap.dofs_per_element): +# if I == J: +# start = p +# else: +# start = 0 +# for q in range(start, dim): +# if mask[k]: +# val = 0. +# m = 0 +# for i in range(qr2.rule1.num_nodes): +# for j in range(qr2.rule2.num_nodes): +# val += self.temp[m] * (self.x[i, p]-self.y[j, p]) * PSI[I, m] * (self.x[i, q]-self.y[j, q]) * PSI[J, m] +# m += 1 + +# contrib[k] = val*vol +# k += 1 +# else: +# if panel < 0: +# sQR = (self.distantQuadRulesPtr[MAX_PANEL+panel]) +# else: +# sQR = (self.distantQuadRulesPtr[panel]) +# qr2 = (sQR.qr) +# if sQR.qrTransformed0 is not None: +# qr0trans = sQR.qrTransformed0 +# else: +# qr0 = qr2.rule1 +# qr0trans = transformQuadratureRule(qr0) +# sQR.qrTransformed0 = qr0trans +# if sQR.qrTransformed1 is not None: +# qr1trans = sQR.qrTransformed1 +# else: +# qr1 = qr2.rule2 +# qr1trans = transformQuadratureRule(qr1) +# sQR.qrTransformed1 = qr1trans + +# numQuadNodes0 = qr0trans.num_nodes +# numQuadNodes1 = qr1trans.num_nodes + +# vol = vol1*vol2 +# dofs_per_element = self.DoFMap.dofs_per_element + +# A1 = a_A1 +# b1 = a_b1 +# A2 = a_A2 + +# self.kernel.interaction.startLoopSubSimplices_Simplex(simplex1, simplex2) +# while self.kernel.interaction.nextSubSimplex_Simplex(A1, b1, &c1): +# qr0trans.setAffineBaryTransform(A1, b1) +# qr0trans.nodesInGlobalCoords(simplex1, self.x) +# for i in range(qr0trans.num_nodes): +# self.kernel.interaction.startLoopSubSimplices_Node(self.x[i, :], simplex2) +# while self.kernel.interaction.nextSubSimplex_Node(A2, &c2): +# qr1trans.setLinearBaryTransform(A2) +# qr1trans.nodesInGlobalCoords(simplex2, self.y) +# for j in range(qr1trans.num_nodes): +# dist2 = 0. +# for p in range(dim): +# dist2 += (self.x[i, p]-self.y[j, p])*(self.x[i, p]-self.y[j, p]) +# val = qr0trans.weights[i]*qr1trans.weights[j]*self.kernel.evalPtr(dim, &self.x[i, 0], &self.y[j, 0]) / dist2 +# val *= c1 * c2 * vol +# k = 0 +# for I in range(2*dofs_per_element): +# for p in range(dim): +# if I < dofs_per_element: +# PSI_I = self.getLocalShapeFunction(I).evalStrided(&qr0trans.nodes[0, i], numQuadNodes0) +# else: +# PSI_I = -self.getLocalShapeFunction(I-dofs_per_element).evalStrided(&qr1trans.nodes[0, j], numQuadNodes1) +# for J in range(I, 2*dofs_per_element): +# if I == J: +# start = p +# else: +# start = 0 +# for q in range(start, dim): +# if mask[k]: +# if J < dofs_per_element: +# PSI_J = self.getLocalShapeFunction(J).evalStrided(&qr0trans.nodes[0, i], numQuadNodes0) +# else: +# PSI_J = -self.getLocalShapeFunction(J-dofs_per_element).evalStrided(&qr1trans.nodes[0, j], numQuadNodes1) +# contrib[k] += val * (self.x[i, p]-self.y[j, p])*(self.x[i, q]-self.y[j, q]) * PSI_I*PSI_J +# k += 1 + +# cdef void eval_distant_tensor_boundary(self, +# REAL_t[::1] contrib, +# panelType panel, +# MASK_t mask=ALL): +# cdef: +# INDEX_t k, m, i, j, I, J +# REAL_t vol, val, vol1 = self.vol1, vol2 = self.vol2 +# doubleSimplexQuadratureRule qr2 +# REAL_t[:, ::1] PHI +# REAL_t[:, ::1] simplex1 = self.simplex1 +# REAL_t[:, ::1] simplex2 = self.simplex2 +# INDEX_t dim = simplex1.shape[1] +# REAL_t normW, nw + +# # Kernel: +# # \Gamma(x,y) = n \dot (x-y) * C(d,s) / (2s) / |x-y|^{d+2s} +# # with inward normal n. +# # +# # Rewrite as +# # \Gamma(x,y) = [ n \dot (x-y)/|x-y| ] * [ C(d,s) / (2s) / |x-y|^{d-1+2s} ] +# # \--------------------------------/ +# # | +# # boundaryKernel +# # +# # n is independent of x and y +# if dim == 2: +# self.n[0] = simplex2[1, 1] - simplex2[0, 1] +# self.n[1] = simplex2[0, 0] - simplex2[1, 0] +# # F is same as vol2 +# val = 1./sqrt(mydot(self.n, self.n)) +# self.n[0] *= val +# self.n[1] *= val +# elif dim == 3: +# for j in range(dim): +# self.x[0, j] = simplex2[1, j]-simplex2[0, j] +# for j in range(dim): +# self.x[1, j] = simplex2[2, j]-simplex2[0, j] +# self.n[0] = self.x[0, 1]*self.x[1, 2]-self.x[0, 2]*self.x[1, 1] +# self.n[1] = self.x[0, 2]*self.x[1, 0]-self.x[0, 0]*self.x[1, 2] +# self.n[2] = self.x[0, 0]*self.x[1, 1]-self.x[0, 1]*self.x[1, 0] +# val = 1./sqrt(mydot(self.n, self.n)) +# self.n[0] *= val +# self.n[1] *= val +# self.n[2] *= val + +# contrib[:] = 0. + +# vol = vol1*vol2 +# if panel < 0: +# sQR = (self.distantQuadRulesPtr[MAX_PANEL+panel]) +# else: +# sQR = (self.distantQuadRulesPtr[panel]) +# qr2 = (sQR.qr) +# PHI = sQR.PHI +# qr2.rule1.nodesInGlobalCoords(simplex1, self.x) +# qr2.rule2.nodesInGlobalCoords(simplex2, self.y) + +# for k in range(qr2.rule1.num_nodes): +# for m in range(qr2.rule2.num_nodes): +# if dim == 1: +# nw = 1. +# else: +# normW = 0. +# for j in range(dim): +# self.w[j] = self.y[m, j]-self.x[k, j] +# normW += self.w[j]**2 +# normW = 1./sqrt(normW) +# for j in range(dim): +# self.w[j] *= normW +# nw = mydot(self.n, self.w) +# i = k*qr2.rule2.num_nodes+m +# self.temp[i] = qr2.weights[i] * nw * self.kernel.evalPtr(dim, &self.x[k, 0], &self.y[m, 0]) * normW**2 + +# k = 0 +# for I in range(self.DoFMap.dofs_per_element): +# for p in range(dim): +# for J in range(I, self.DoFMap.dofs_per_element): +# if I == J: +# start = p +# else: +# start = 0 +# for q in range(start, dim): +# if mask[k]: +# val = 0. +# m = 0 +# for i in range(qr2.rule1.num_nodes): +# for j in range(qr2.rule2.num_nodes): +# val += self.temp[m] * PHI[I, m] * (self.x[i, p]-self.y[j, p]) * PHI[J, m] * (self.x[i, q]-self.y[j, q]) +# m += 1 +# contrib[k] = val*vol +# k += 1 + +# cdef void eval_distant_vector(self, +# REAL_t[:, ::1] contrib, +# panelType panel, +# MASK_t mask=ALL): +# cdef: +# INDEX_t k, i, j, I, J, m +# REAL_t vol, val, vol1 = self.vol1, vol2 = self.vol2 +# doubleSimplexQuadratureRule qr2 +# REAL_t[:, ::1] PSI +# REAL_t[:, ::1] simplex1 = self.simplex1 +# REAL_t[:, ::1] simplex2 = self.simplex2 +# INDEX_t dim = simplex1.shape[1] +# INDEX_t vectorSize = self.kernel.vectorSize +# REAL_t c1, c2, PSI_I, PSI_J +# transformQuadratureRule qr0trans, qr1trans +# INDEX_t dofs_per_element, numQuadNodes0, numQuadNodes1 +# REAL_t a_b1[3] +# REAL_t a_A1[3][3] +# REAL_t a_A2[3][3] +# REAL_t[::1] b1 +# REAL_t[:, ::1] A1, A2 +# BOOL_t cutElements = False + +# if self.kernel.finiteHorizon: +# # check if the horizon might cut the elements +# if self.kernel.interaction.relPos == CUT: +# cutElements = True +# if self.kernel.complement: +# cutElements = False +# # TODO: cutElements should be set to True, but +# # need to figure out the element +# # transformation. + +# contrib[:, :] = 0. + +# if not cutElements: +# vol = vol1*vol2 +# if panel < 0: +# sQR = (self.distantQuadRulesPtr[MAX_PANEL+panel]) +# else: +# sQR = (self.distantQuadRulesPtr[panel]) +# qr2 = (sQR.qr) +# PSI = sQR.PSI +# qr2.rule1.nodesInGlobalCoords(simplex1, self.x) +# qr2.rule2.nodesInGlobalCoords(simplex2, self.y) + +# k = 0 +# for i in range(qr2.rule1.num_nodes): +# for j in range(qr2.rule2.num_nodes): +# self.kernel.evalVectorPtr(dim, +# &self.x[i, 0], +# &self.y[j, 0], +# vectorSize, +# &self.vec[0]) +# for m in range(vectorSize): +# self.tempVector[k, m] = qr2.weights[k]*self.vec[m] +# k += 1 + +# k = 0 +# for I in range(2*self.DoFMap.dofs_per_element): +# for J in range(I, 2*self.DoFMap.dofs_per_element): +# if mask[k]: +# for m in range(vectorSize): +# self.vec[m] = 0. +# for i in range(qr2.num_nodes): +# for m in range(vectorSize): +# self.vec[m] += self.tempVector[i, m] * PSI[I, i] * PSI[J, i] +# for m in range(vectorSize): +# contrib[k, m] = self.vec[m]*vol +# k += 1 +# else: +# raise NotImplementedError() + +# cdef void eval_distant_nonsym_vector(self, +# REAL_t[:, ::1] contrib, +# panelType panel, +# MASK_t mask=ALL): +# cdef: +# INDEX_t k, i, j, I, J, l +# REAL_t vol, vol1 = self.vol1, vol2 = self.vol2 +# doubleSimplexQuadratureRule qr2 +# REAL_t[:, ::1] PSI +# REAL_t[:, :, ::1] PHI +# REAL_t[:, ::1] simplex1 = self.simplex1 +# REAL_t[:, ::1] simplex2 = self.simplex2 +# INDEX_t dim = simplex1.shape[1] +# BOOL_t cutElements = False +# REAL_t w +# INDEX_t vectorSize = self.kernel.vectorSize + +# if self.kernel.finiteHorizon: +# # check if the horizon might cut the elements +# if self.kernel.interaction.relPos == CUT: +# cutElements = True +# if self.kernel.complement: +# cutElements = False +# # TODO: cutElements should be set to True, but +# # need to figure out the element +# # transformation. + +# contrib[:, :] = 0. + +# if not cutElements: +# vol = vol1*vol2 +# if panel < 0: +# sQR = (self.distantQuadRulesPtr[MAX_PANEL+panel]) +# else: +# sQR = (self.distantQuadRulesPtr[panel]) +# qr2 = (sQR.qr) +# PSI = sQR.PSI +# PHI = sQR.PHI3 +# qr2.rule1.nodesInGlobalCoords(simplex1, self.x) +# qr2.rule2.nodesInGlobalCoords(simplex2, self.y) + +# k = 0 +# for i in range(qr2.rule1.num_nodes): +# for j in range(qr2.rule2.num_nodes): +# w = qr2.weights[k] +# self.kernel.evalVectorPtr(dim, +# &self.x[i, 0], +# &self.y[j, 0], +# vectorSize, +# &self.vec[0]) +# self.kernel.evalVectorPtr(dim, +# &self.y[j, 0], +# &self.x[i, 0], +# vectorSize, +# &self.vec2[0]) +# for l in range(vectorSize): +# self.tempVector[k, l] = w * self.vec[l] +# self.tempVector2[k, l] = w * self.vec2[l] +# k += 1 + +# k = 0 +# for I in range(2*self.DoFMap.dofs_per_element): +# for J in range(2*self.DoFMap.dofs_per_element): +# if mask[k]: +# for l in range(vectorSize): +# self.vec[l] = 0. +# for i in range(qr2.num_nodes): +# for l in range(vectorSize): +# self.vec[l] += (self.tempVector[i, l] * PHI[0, I, i] - self.tempVector2[i, l] * PHI[1, I, i]) * PSI[J, i] +# for l in range(vectorSize): +# contrib[k, l] = self.vec[l]*vol +# k += 1 +# else: +# raise NotImplementedError() + +# cdef void eval_distant_boundary_vector(self, +# REAL_t[:, ::1] contrib, +# panelType panel, +# MASK_t mask=ALL): +# cdef: +# INDEX_t k, m, i, j, I, J +# REAL_t vol, val, vol1 = self.vol1, vol2 = self.vol2 +# doubleSimplexQuadratureRule qr2 +# REAL_t[:, ::1] PHI +# REAL_t[:, ::1] simplex1 = self.simplex1 +# REAL_t[:, ::1] simplex2 = self.simplex2 +# INDEX_t dim = simplex1.shape[1] +# REAL_t normW, nw +# INDEX_t vectorSize = self.kernel.vectorSize + +# # Kernel: +# # \Gamma(x,y) = n \dot (x-y) * C(d,s) / (2s) / |x-y|^{d+2s} +# # with inward normal n. +# # +# # Rewrite as +# # \Gamma(x,y) = [ n \dot (x-y)/|x-y| ] * [ C(d,s) / (2s) / |x-y|^{d-1+2s} ] +# # \--------------------------------/ +# # | +# # boundaryKernel +# # +# # n is independent of x and y +# if dim == 2: +# self.n[0] = simplex2[1, 1] - simplex2[0, 1] +# self.n[1] = simplex2[0, 0] - simplex2[1, 0] +# # F is same as vol2 +# val = 1./sqrt(mydot(self.n, self.n)) +# self.n[0] *= val +# self.n[1] *= val +# elif dim == 3: +# for j in range(dim): +# self.x[0, j] = simplex2[1, j]-simplex2[0, j] +# for j in range(dim): +# self.x[1, j] = simplex2[2, j]-simplex2[0, j] +# self.n[0] = self.x[0, 1]*self.x[1, 2]-self.x[0, 2]*self.x[1, 1] +# self.n[1] = self.x[0, 2]*self.x[1, 0]-self.x[0, 0]*self.x[1, 2] +# self.n[2] = self.x[0, 0]*self.x[1, 1]-self.x[0, 1]*self.x[1, 0] +# val = 1./sqrt(mydot(self.n, self.n)) +# self.n[0] *= val +# self.n[1] *= val +# self.n[2] *= val + +# contrib[:, :] = 0. + +# vol = vol1*vol2 +# if panel < 0: +# sQR = (self.distantQuadRulesPtr[MAX_PANEL+panel]) +# else: +# sQR = (self.distantQuadRulesPtr[panel]) +# qr2 = (sQR.qr) +# PHI = sQR.PHI +# qr2.rule1.nodesInGlobalCoords(simplex1, self.x) +# qr2.rule2.nodesInGlobalCoords(simplex2, self.y) + +# for k in range(qr2.rule1.num_nodes): +# for m in range(qr2.rule2.num_nodes): +# if dim == 1: +# nw = 1. +# else: +# normW = 0. +# for j in range(dim): +# self.w[j] = self.y[m, j]-self.x[k, j] +# normW += self.w[j]**2 +# normW = 1./sqrt(normW) +# for j in range(dim): +# self.w[j] *= normW +# nw = mydot(self.n, self.w) +# i = k*qr2.rule2.num_nodes+m +# self.kernel.evalVectorPtr(dim, &self.x[k, 0], &self.y[m, 0], vectorSize, &self.vec[0]) +# for m in range(vectorSize): +# self.tempVector[i, m] = qr2.weights[i] * nw * self.vec[m] + +# k = 0 +# for I in range(self.DoFMap.dofs_per_element): +# for J in range(I, self.DoFMap.dofs_per_element): +# if mask[k]: +# for m in range(vectorSize): +# self.vec[m] = 0. +# for i in range(qr2.num_nodes): +# for m in range(vectorSize): +# self.vec[m] += self.tempVector[i, m] * PHI[I, i] * PHI[J, i] +# for m in range(vectorSize): +# contrib[k, m] = self.vec[m]*vol +# k += 1 +# # release cdef class nonlocalLaplacian1D(nonlocalLaplacian): diff --git a/nl/PyNucleus_nl/nonlocalLaplacianBase_decl_{SCALAR}.pxi b/nl/PyNucleus_nl/nonlocalLaplacianBase_decl_{SCALAR}.pxi new file mode 100644 index 00000000..4d761ae1 --- /dev/null +++ b/nl/PyNucleus_nl/nonlocalLaplacianBase_decl_{SCALAR}.pxi @@ -0,0 +1,91 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + +cdef class {SCALAR_label}double_local_matrix_t: + cdef: + REAL_t[:, ::1] vertices1, vertices2 + INDEX_t[:, ::1] cells1, cells2 + public dict distantQuadRules + public DoFMap DoFMap + INDEX_t dim + INDEX_t manifold_dim1, manifold_dim2 + public bint symmetricLocalMatrix + public bint symmetricCells + public INDEX_t cellNo1, cellNo2 + REAL_t[:, :, ::1] precomputedSimplices + REAL_t[:, ::1] precomputedCenters + REAL_t[::1] precomputedVolumes + REAL_t[::1] precomputedH + REAL_t[:, ::1] simplex1, simplex2 + REAL_t[::1] center1, center2 + volume_t volume1, volume2 + public REAL_t vol1, vol2 + panelType IDENTICAL + REAL_t dmin2, dmax2, dcenter2 + REAL_t h1MaxInv, h2MaxInv, dMaxInv + PermutationIndexer pI_volume, pI_surface + public INDEX_t[::1] perm1, perm2, perm + public INDEX_t[:, ::1] precomputedVolumeSimplexPermutations + public INDEX_t[:, ::1] precomputedSurfaceSimplexPermutations + public INDEX_t[:, ::1] precomputedDoFPermutations + cdef void precomputePermutations(self) + cdef void precomputeSimplices(self) + cdef INDEX_t getCellPairIdentifierSize(self) + cdef void computeCellPairIdentifierBase(self, INDEX_t[::1] ID, INDEX_t *perm) + cdef void computeCellPairIdentifier(self, INDEX_t[::1] ID, INDEX_t *perm) + cdef void setMesh1(self, meshBase mesh1) + cdef void setMesh2(self, meshBase mesh2) + cdef void setVerticesCells1(self, REAL_t[:, ::1] vertices1, INDEX_t[:, ::1] cells1) + cdef void setVerticesCells2(self, REAL_t[:, ::1] vertices2, INDEX_t[:, ::1] cells2) + cdef void setCell1(self, INDEX_t cellNo1) + cdef void setCell2(self, INDEX_t cellNo2) + cdef void setSimplex1(self, REAL_t[:, ::1] simplex1) + cdef void setSimplex2(self, REAL_t[:, ::1] simplex2) + cdef void swapCells(self) + cdef void eval(self, + {SCALAR}_t[::1] contrib, + panelType panel, + MASK_t mask=*) + cdef void evalVector(self, + {SCALAR}_t[:, ::1] contrib, + panelType panel, + MASK_t mask=*) + cdef panelType getQuadOrder(self, + const REAL_t h1, + const REAL_t h2, + REAL_t d) + cdef panelType getProtoPanelType(self) + cdef void computeCenterDistance(self) + cdef void computeExtremeDistances(self) + cpdef panelType getPanelType(self) + cdef void addQuadRule(self, panelType panel) + cdef REAL_t get_h_simplex(self, const REAL_t[:, ::1] simplex) + cdef REAL_t get_h_surface_simplex(self, const REAL_t[:, ::1] simplex) + cdef void getSimplexCenter(self, + const REAL_t[:, ::1] simplex, + REAL_t[::1] center) + + +cdef class {SCALAR_label}nonlocalLaplacian({SCALAR_label}double_local_matrix_t): + cdef: + public REAL_t H0, hmin, num_dofs + void* localShapeFunctions + dict specialQuadRules + public {SCALAR_label}Kernel kernel + REAL_t[:, ::1] x, y + void** distantQuadRulesPtr + {SCALAR}_t[::1] temp, temp2 + public REAL_t[::1] n, w + cdef void getNearQuadRule(self, panelType panel) + cdef inline shapeFunction getLocalShapeFunction(self, INDEX_t local_dof) + cdef void addQuadRule(self, panelType panel) + cdef void addQuadRule_nonSym(self, panelType panel) + cdef void addQuadRule_boundary(self, panelType panel) + cdef void getNonSingularNearQuadRule(self, panelType panel) + cdef void eval_distant(self, {SCALAR}_t[::1] contrib, panelType panel, MASK_t mask=*) + cdef void eval_distant_nonsym(self, {SCALAR}_t[::1] contrib, panelType panel, MASK_t mask=*) + cdef void eval_distant_boundary(self, {SCALAR}_t[::1] contrib, panelType panel, MASK_t mask=*) diff --git a/nl/PyNucleus_nl/nonlocalLaplacianBase_{SCALAR}.pxi b/nl/PyNucleus_nl/nonlocalLaplacianBase_{SCALAR}.pxi new file mode 100644 index 00000000..765b7f46 --- /dev/null +++ b/nl/PyNucleus_nl/nonlocalLaplacianBase_{SCALAR}.pxi @@ -0,0 +1,1055 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + +cdef class {SCALAR_label}double_local_matrix_t: + def __init__(self, INDEX_t dim, INDEX_t manifold_dim1, INDEX_t manifold_dim2, DoFMap dm): + self.distantQuadRules = {} + self.dim = dim + self.manifold_dim1 = manifold_dim1 + self.manifold_dim2 = manifold_dim2 + self.symmetricLocalMatrix = True + self.symmetricCells = True + self.cellNo1 = -1 + self.cellNo2 = -1 + self.vol1 = np.nan + self.vol2 = np.nan + + self.DoFMap = dm + self.precomputePermutations() + + if dim == 1 and manifold_dim1 == 1: + self.volume1 = volume1Dsimplex + elif dim == 1 and manifold_dim1 == 0: + self.volume1 = volume0Dsimplex + elif dim == 2 and manifold_dim1 == 2: + self.volume1 = volume2Dsimplex + elif dim == 2 and manifold_dim1 == 1: + self.volume1 = volume1Din2Dsimplex + elif dim == 3 and manifold_dim1 == 3: + self.volume1 = volume3Dsimplex + elif dim == 3 and manifold_dim1 == 2: + self.volume1 = volume2Din3Dsimplex + else: + raise NotImplementedError() + + if dim == 1 and manifold_dim2 == 1: + self.volume2 = volume1Dsimplex + elif dim == 1 and manifold_dim2 == 0: + self.volume2 = volume0Dsimplex + elif dim == 2 and manifold_dim2 == 2: + self.volume2 = volume2Dsimplex + elif dim == 2 and manifold_dim2 == 1: + self.volume2 = volume1Din2Dsimplex + elif dim == 3 and manifold_dim2 == 3: + self.volume2 = volume3Dsimplex + elif dim == 3 and manifold_dim2 == 2: + self.volume2 = volume2Din3Dsimplex + else: + raise NotImplementedError() + + if manifold_dim1 == 1: + self.IDENTICAL = COMMON_EDGE + elif manifold_dim1 == 2: + self.IDENTICAL = COMMON_FACE + elif manifold_dim1 == 3: + self.IDENTICAL = COMMON_VOLUME + else: + raise NotImplementedError() + + self.center1 = uninitialized((self.dim), dtype=REAL) + self.center2 = uninitialized((self.dim), dtype=REAL) + + cdef void precomputePermutations(self): + cdef: + INDEX_t[:, ::1] perms, surface_perms + INDEX_t r, j, dofPerm, dofOrig, index, k + tuple permTuple + INDEX_t[::1] perm + REAL_t eps = 1e-10 + INDEX_t dim = self.DoFMap.mesh.manifold_dim + + perms = uninitialized((np.math.factorial(dim+1), dim+1), dtype=INDEX) + surface_perms = uninitialized((np.math.factorial(dim), dim), dtype=INDEX) + + from itertools import permutations + + self.pI_volume = PermutationIndexer(dim+1) + for permTuple in permutations(range(dim+1)): + perm = np.array(permTuple, dtype=INDEX) + index = self.pI_volume.rank(perm) + for k in range(dim+1): + perms[index, k] = perm[k] + + self.pI_surface = PermutationIndexer(dim) + for permTuple in permutations(range(dim)): + perm = np.array(permTuple, dtype=INDEX) + index = self.pI_surface.rank(perm) + for k in range(dim): + surface_perms[index, k] = perm[k] + + self.precomputedVolumeSimplexPermutations = perms + self.precomputedSurfaceSimplexPermutations = surface_perms + self.precomputedDoFPermutations = uninitialized((perms.shape[0], + self.DoFMap.dofs_per_element), dtype=INDEX) + for r in range(perms.shape[0]): + for dofPerm in range(self.DoFMap.dofs_per_element): + for dofOrig in range(self.DoFMap.dofs_per_element): + for j in range(dim+1): + if abs(self.DoFMap.nodes[dofPerm, j]-self.DoFMap.nodes[dofOrig, perms[r, j]]) > eps: + break + else: + self.precomputedDoFPermutations[r, dofPerm] = dofOrig + break + else: + # We should never get here + raise NotImplementedError() + + cdef void precomputeSimplices(self): + # mesh1 and mesh 2 will be the same + cdef: + INDEX_t cellNo1 + INDEX_t m, k, l + REAL_t fac = 1./self.cells1.shape[1] + self.precomputedSimplices = uninitialized((self.cells1.shape[0], self.cells1.shape[1], self.dim), dtype=REAL) + self.precomputedCenters = np.zeros((self.cells1.shape[0], self.dim), dtype=REAL) + for cellNo1 in range(self.cells1.shape[0]): + for m in range(self.cells1.shape[1]): + k = self.cells1[cellNo1, m] + for l in range(self.vertices1.shape[1]): + self.precomputedSimplices[cellNo1, m, l] = self.vertices1[k, l] + self.precomputedCenters[cellNo1, l] += self.vertices1[k, l] + for l in range(self.vertices1.shape[1]): + self.precomputedCenters[cellNo1, l] *= fac + + cdef INDEX_t getCellPairIdentifierSize(self): + return -1 + + cdef void computeCellPairIdentifierBase(self, INDEX_t[::1] ID, INDEX_t *perm): + raise NotImplementedError() + + cdef void computeCellPairIdentifier(self, INDEX_t[::1] ID, INDEX_t *perm): + self.computeCellPairIdentifierBase(ID, perm) + + def computeCellPairIdentifier_py(self): + cdef: + INDEX_t perm = 0 + ID = uninitialized((self.getCellPairIdentifierSize()), dtype=INDEX) + self.computeCellPairIdentifier(ID, &perm) + return ID, perm + + cdef void setMesh1(self, meshBase mesh1): + self.setVerticesCells1(mesh1.vertices, mesh1.cells) + self.precomputedVolumes = mesh1.volVector + self.precomputedH = mesh1.hVector + h1 = 2.*mesh1.h + d = 2.*mesh1.diam + self.h1MaxInv = 1./h1 + self.dMaxInv = 1./d + + cdef void setVerticesCells1(self, REAL_t[:, ::1] vertices1, INDEX_t[:, ::1] cells1): + self.vertices1 = vertices1 + self.cells1 = cells1 + self.simplex1 = uninitialized((self.cells1.shape[1], self.dim), dtype=REAL) + self.perm1 = uninitialized((self.cells1.shape[1]), dtype=INDEX) + self.cellNo1 = -1 + self.cellNo2 = -1 + if self.symmetricCells: + # mesh1 and mesh 2 will be the same + self.precomputeSimplices() + + cdef void setMesh2(self, meshBase mesh2): + self.setVerticesCells2(mesh2.vertices, mesh2.cells) + if mesh2.manifold_dim > 0: + h2 = 2.*mesh2.h + self.h2MaxInv = 1./h2 + else: + self.h2MaxInv = 1. + + cdef void setVerticesCells2(self, REAL_t[:, ::1] vertices2, INDEX_t[:, ::1] cells2): + self.vertices2 = vertices2 + self.cells2 = cells2 + self.simplex2 = uninitialized((self.cells2.shape[1], self.dim), dtype=REAL) + self.perm2 = uninitialized((self.cells2.shape[1]), dtype=INDEX) + self.perm = uninitialized((2*self.DoFMap.dofs_per_element), dtype=INDEX) + self.cellNo1 = -1 + self.cellNo2 = -1 + + cdef void setCell1(self, INDEX_t cellNo1): + if self.cellNo1 == cellNo1: + return + self.cellNo1 = cellNo1 + if not self.symmetricCells: + getSimplexAndCenter(self.cells1, self.vertices1, self.cellNo1, self.simplex1, self.center1) + self.vol1 = self.volume1(self.simplex1) + else: + self.simplex1 = self.precomputedSimplices[cellNo1, :, :] + self.center1 = self.precomputedCenters[cellNo1, :] + self.vol1 = self.precomputedVolumes[cellNo1] + + cdef void setCell2(self, INDEX_t cellNo2): + if self.cellNo2 == cellNo2: + return + self.cellNo2 = cellNo2 + if not self.symmetricCells: + getSimplexAndCenter(self.cells2, self.vertices2, self.cellNo2, self.simplex2, self.center2) + self.vol2 = self.volume2(self.simplex2) + else: + self.simplex2 = self.precomputedSimplices[cellNo2, :, :] + self.center2 = self.precomputedCenters[cellNo2, :] + self.vol2 = self.precomputedVolumes[cellNo2] + + def setMesh1_py(self, meshBase mesh1): + self.setMesh1(mesh1) + + def setMesh2_py(self, meshBase mesh2): + self.setMesh2(mesh2) + + def setCell1_py(self, INDEX_t cellNo1): + self.setCell1(cellNo1) + + def setCell2_py(self, INDEX_t cellNo2): + self.setCell2(cellNo2) + + cdef void swapCells(self): + self.cellNo1, self.cellNo2 = self.cellNo2, self.cellNo1 + self.simplex1, self.simplex2 = self.simplex2, self.simplex1 + self.center1, self.center2 = self.center2, self.center1 + + cdef void setSimplex1(self, REAL_t[:, ::1] simplex1): + self.simplex1 = simplex1 + self.getSimplexCenter(self.simplex1, self.center1) + self.vol1 = self.volume1(self.simplex1) + + cdef void setSimplex2(self, REAL_t[:, ::1] simplex2): + self.simplex2 = simplex2 + self.getSimplexCenter(self.simplex2, self.center2) + self.vol2 = self.volume2(self.simplex2) + + def __call__(self, + {SCALAR}_t[::1] contrib, + panelType panel): + return self.eval(contrib, panel) + + cdef void eval(self, + {SCALAR}_t[::1] contrib, + panelType panel, + MASK_t mask=ALL): + raise NotImplementedError() + + cdef void evalVector(self, + {SCALAR}_t[:, ::1] contrib, + panelType panel, + MASK_t mask=ALL): + raise NotImplementedError() + + def eval_py(self, + {SCALAR}_t[::1] contrib, + panel): + self.eval(contrib, panel, ALL) + + def evalVector_py(self, + {SCALAR}_t[:, ::1] contrib, + panel): + self.evalVector(contrib, panel, ALL) + + cdef panelType getQuadOrder(self, + const REAL_t h1, + const REAL_t h2, + REAL_t d): + raise NotImplementedError() + + cdef REAL_t get_h_simplex(self, const REAL_t[:, ::1] simplex): + raise NotImplementedError() + + cdef REAL_t get_h_surface_simplex(self, const REAL_t[:, ::1] simplex): + raise NotImplementedError() + + cdef void getSimplexCenter(self, + const REAL_t[:, ::1] simplex, + REAL_t[::1] center): + cdef: + INDEX_t i, j + REAL_t fac + center[:] = 0. + for i in range(simplex.shape[0]): + for j in range(simplex.shape[1]): + center[j] += simplex[i, j] + fac = 1./simplex.shape[0] + for j in range(simplex.shape[1]): + center[j] *= fac + + cdef panelType getProtoPanelType(self): + # Given two cells, determines their relationship: + # - COMMON_FACE + # - COMMON_EDGE + # - COMMON_VERTEX + # - DISTANT + cdef: + INDEX_t mask1 = 0, mask2 = 0 + INDEX_t numVertices1 = self.cells1.shape[1] + INDEX_t numVertices2 = self.cells2.shape[1] + INDEX_t vertexNo1, vertexNo2, vertex1, vertex2 + INDEX_t commonVertices = 0 + INDEX_t k, i + INDEX_t dofs_per_vertex, dofs_per_edge, dofs_per_face, dofs_per_element = self.DoFMap.dofs_per_element + panelType panel + INDEX_t chosenPermutation + if self.symmetricCells: + if self.cellNo1 > self.cellNo2: + return IGNORED + + if (self.cells1.shape[1] == self.cells2.shape[1]) and (self.cellNo1 == self.cellNo2): + for k in range(numVertices1): + self.perm1[k] = k + for k in range(numVertices2): + self.perm2[k] = k + for k in range(dofs_per_element): + self.perm[k] = k + return self.IDENTICAL + + # now the two simplices can share at most numVertices1-1 vertices + + for vertexNo1 in range(numVertices1): + vertex1 = self.cells1[self.cellNo1, vertexNo1] + for vertexNo2 in range(numVertices2): + if mask2 & (1 << vertexNo2): + continue + vertex2 = self.cells2[self.cellNo2, vertexNo2] + if vertex1 == vertex2: + self.perm1[commonVertices] = vertexNo1 + self.perm2[commonVertices] = vertexNo2 + mask1 += (1 << vertexNo1) + mask2 += (1 << vertexNo2) + commonVertices += 1 + break + + if commonVertices == 0: + for k in range(numVertices1): + self.perm1[k] = k + for k in range(numVertices2): + self.perm2[k] = k + for k in range(dofs_per_element): + self.perm[k] = k + return 0 + else: + i = 0 + for k in range(commonVertices, numVertices1): + while mask1 & (1 << i): + i += 1 + self.perm1[k] = i + mask1 += (1 << i) + + i = 0 + for k in range(commonVertices, numVertices2): + while mask2 & (1 << i): + i += 1 + self.perm2[k] = i + mask2 += (1 << i) + + # we now have set permutations for the two simplices + # we have at least one shared vertex + + chosenPermutation = self.pI_volume.rank(self.perm1) + for k in range(dofs_per_element): + self.perm[k] = self.precomputedDoFPermutations[chosenPermutation, k] + + if numVertices1 == numVertices2: + dofs_per_vertex = self.DoFMap.dofs_per_vertex + dofs_per_edge = self.DoFMap.dofs_per_edge + dofs_per_face = self.DoFMap.dofs_per_face + + chosenPermutation = self.pI_volume.rank(self.perm2) + if commonVertices == 1: + for k in range(dofs_per_vertex, dofs_per_element): + self.perm[dofs_per_element+k-dofs_per_vertex] = dofs_per_element+self.precomputedDoFPermutations[chosenPermutation, k] + elif commonVertices == 2: + for k in range(2*dofs_per_vertex, numVertices2*dofs_per_vertex): + self.perm[dofs_per_element+k-2*dofs_per_vertex] = dofs_per_element+self.precomputedDoFPermutations[chosenPermutation, k] + for k in range(numVertices2*dofs_per_vertex+dofs_per_edge, dofs_per_element): + self.perm[dofs_per_element+k-2*dofs_per_vertex-dofs_per_edge] = dofs_per_element+self.precomputedDoFPermutations[chosenPermutation, k] + elif commonVertices == 3: + # only in 3d + for k in range(3*dofs_per_vertex, numVertices2*dofs_per_vertex): + self.perm[dofs_per_element+k-3*dofs_per_vertex] = dofs_per_element+self.precomputedDoFPermutations[chosenPermutation, k] + for k in range(numVertices2*dofs_per_vertex+3*dofs_per_edge, numVertices2*dofs_per_vertex+6*dofs_per_edge): + self.perm[dofs_per_element+k-3*dofs_per_vertex-3*dofs_per_edge] = dofs_per_element+self.precomputedDoFPermutations[chosenPermutation, k] + for k in range(numVertices2*dofs_per_vertex+6*dofs_per_edge+dofs_per_face, dofs_per_element): + self.perm[dofs_per_element+k-3*dofs_per_vertex-3*dofs_per_edge-dofs_per_face] = dofs_per_element+self.precomputedDoFPermutations[chosenPermutation, k] + panel = -commonVertices + return panel + + cdef void computeCenterDistance(self): + cdef: + INDEX_t j + REAL_t d2 = 0. + for j in range(self.dim): + d2 += (self.center1[j]-self.center2[j])**2 + self.dcenter2 = d2 + + cdef void computeExtremeDistances(self): + cdef: + INDEX_t i, k, j + INDEX_t noSimplex1 = self.simplex1.shape[0] + INDEX_t noSimplex2 = self.simplex2.shape[0] + REAL_t d2 + REAL_t dmin2 = inf + REAL_t dmax2 = 0. + for i in range(noSimplex1): + for k in range(noSimplex2): + d2 = 0. + for j in range(self.dim): + d2 += (self.simplex1[i, j] - self.simplex2[k, j])**2 + dmin2 = min(dmin2, d2) + dmax2 = max(dmax2, d2) + self.dmin2 = dmin2 + self.dmax2 = dmax2 + + cpdef panelType getPanelType(self): + raise NotImplementedError() + + cdef void addQuadRule(self, panelType panel): + raise NotImplementedError() + + def addQuadRule_py(self, panelType panel): + self.addQuadRule(panel) + + def __repr__(self): + return '{}\n'.format(self.__class__.__name__) + + +cdef class {SCALAR_label}nonlocalLaplacian({SCALAR_label}double_local_matrix_t): + def __init__(self, + {SCALAR_label}Kernel kernel, + meshBase mesh, DoFMap dm, + num_dofs=None, INDEX_t manifold_dim2=-1): + cdef: + shapeFunction sf + INDEX_t i + if manifold_dim2 < 0: + manifold_dim2 = mesh.manifold_dim + {SCALAR_label}double_local_matrix_t.__init__(self, mesh.dim, mesh.manifold_dim, manifold_dim2, dm) + if num_dofs is None: + self.num_dofs = dm.num_dofs + else: + self.num_dofs = num_dofs + self.hmin = mesh.hmin + self.H0 = mesh.diam/sqrt(8) + self.localShapeFunctions = malloc(self.DoFMap.dofs_per_element*sizeof(void*)) + for i in range(self.DoFMap.dofs_per_element): + sf = dm.localShapeFunctions[i] + ((self.localShapeFunctions+i*sizeof(void*)))[0] = sf + self.specialQuadRules = {} + self.distantQuadRulesPtr = malloc(MAX_PANEL*sizeof(void*)) + for i in range(MAX_PANEL): + self.distantQuadRulesPtr[i] = NULL + + self.x = uninitialized((0, self.dim), dtype=REAL) + self.y = uninitialized((0, self.dim), dtype=REAL) + self.temp = uninitialized((0), dtype={SCALAR}) + + self.n = uninitialized((self.dim), dtype=REAL) + self.w = uninitialized((self.dim), dtype=REAL) + + self.kernel = kernel + + if self.kernel.variable: + self.symmetricCells = self.kernel.symmetric + self.symmetricLocalMatrix = self.kernel.symmetric + else: + self.symmetricCells = True + self.symmetricLocalMatrix = True + + if self.kernel.variableHorizon: + self.symmetricCells = False + + def __del__(self): + free(self.localShapeFunctions) + free(self.distantQuadRulesPtr) + + cdef void getNearQuadRule(self, panelType panel): + raise NotImplementedError() + + cdef void computeCellPairIdentifier(self, INDEX_t[::1] ID, INDEX_t *perm): + assert not self.kernel.variable + if self.kernel.finiteHorizon: + self.computeExtremeDistances() + if self.dmax2 <= self.kernel.getHorizonValue2(): + # entirely within horizon + self.computeCellPairIdentifierBase(ID, perm) + elif self.dmin2 >= self.kernel.getHorizonValue2(): + # entirely outside of horizon + ID[0] = IGNORED + else: + # on horizon + ID[0] = ON_HORIZON + else: + self.computeCellPairIdentifierBase(ID, perm) + + cpdef panelType getPanelType(self): + # Given two cells, determines their relationship: + # - COMMON_FACE + # - COMMON_EDGE + # - COMMON_VERTEX + # - DISTANT + # - IGNORED + cdef: + panelType panel + REAL_t d, h1, h2 + REAL_t alpha + panel = self.getProtoPanelType() + + if panel == IGNORED: + return IGNORED + + if self.kernel.variable: + if self.kernel.piecewise: + self.kernel.evalParams(self.center1, self.center2) + else: + self.kernel.evalParamsOnSimplices(self.center1, self.center2, self.simplex1, self.simplex2) + + if panel == DISTANT: + if self.kernel.interaction.getRelativePosition(self.simplex1, self.simplex2) == REMOTE: + return IGNORED + + self.computeCenterDistance() + d = sqrt(self.dcenter2) + + if self.symmetricCells: + h1 = self.precomputedH[self.cellNo1] + else: + h1 = self.get_h_simplex(self.simplex1) + if self.cells1.shape[1] == self.cells2.shape[1]: + if self.symmetricCells: + h2 = self.precomputedH[self.cellNo2] + else: + h2 = self.get_h_simplex(self.simplex2) + else: + h2 = self.get_h_surface_simplex(self.simplex2) + panel = self.getQuadOrder(h1, h2, d) + elif self.kernel.variable: + self.getNearQuadRule(panel) + if panel < 0: + alpha = self.kernel.getSingularityValue() + if alpha == 0.: + self.kernel.interaction.getRelativePosition(self.simplex1, self.simplex2) + return panel + + def __repr__(self): + return (super(nonlocalLaplacian, self).__repr__() + + 'kernel: {}\n'.format(self.kernel)) + + cdef inline shapeFunction getLocalShapeFunction(self, INDEX_t local_dof): + return ((((self.localShapeFunctions+local_dof*sizeof(void*)))[0])) + + cdef void addQuadRule(self, panelType panel): + cdef: + simplexQuadratureRule qr0, qr1 + doubleSimplexQuadratureRule qr2 + specialQuadRule sQR + REAL_t[:, ::1] PSI + INDEX_t I, k, i, j + INDEX_t numQuadNodes0, numQuadNodes1, dofs_per_element + shapeFunction sf + qr0 = simplexXiaoGimbutas(panel, self.dim, self.manifold_dim1) + qr1 = qr0 + qr2 = doubleSimplexQuadratureRule(qr0, qr1) + numQuadNodes0 = qr0.num_nodes + numQuadNodes1 = qr1.num_nodes + dofs_per_element = self.DoFMap.dofs_per_element + PSI = uninitialized((2*dofs_per_element, + qr2.num_nodes), dtype=REAL) + # phi_i(x) - phi_i(y) = phi_i(x) + for I in range(self.DoFMap.dofs_per_element): + sf = self.getLocalShapeFunction(I) + k = 0 + for i in range(numQuadNodes0): + for j in range(numQuadNodes1): + PSI[I, k] = sf(qr0.nodes[:, i]) + k += 1 + # phi_i(x) - phi_i(y) = -phi_i(y) + for I in range(self.DoFMap.dofs_per_element): + sf = self.getLocalShapeFunction(I) + k = 0 + for i in range(numQuadNodes0): + for j in range(numQuadNodes1): + PSI[I+dofs_per_element, k] = -sf(qr1.nodes[:, j]) + k += 1 + sQR = specialQuadRule(qr2, PSI) + self.distantQuadRules[panel] = sQR + self.distantQuadRulesPtr[panel] = (self.distantQuadRules[panel]) + + if numQuadNodes0 > self.x.shape[0]: + self.x = uninitialized((numQuadNodes0, self.dim), dtype=REAL) + if numQuadNodes1 > self.y.shape[0]: + self.y = uninitialized((numQuadNodes1, self.dim), dtype=REAL) + if numQuadNodes0*numQuadNodes1 > self.temp.shape[0]: + self.temp = uninitialized((numQuadNodes0*numQuadNodes1), dtype={SCALAR}) + + cdef void addQuadRule_nonSym(self, panelType panel): + cdef: + simplexQuadratureRule qr0, qr1 + doubleSimplexQuadratureRule qr2 + specialQuadRule sQR + REAL_t[:, ::1] PSI + REAL_t[:, :, ::1] PHI + INDEX_t I, k, i, j + INDEX_t numQuadNodes0, numQuadNodes1, dofs_per_element + shapeFunction sf + qr0 = simplexXiaoGimbutas(panel, self.dim, self.manifold_dim1) + qr1 = qr0 + qr2 = doubleSimplexQuadratureRule(qr0, qr1) + numQuadNodes0 = qr0.num_nodes + numQuadNodes1 = qr1.num_nodes + dofs_per_element = self.DoFMap.dofs_per_element + PSI = uninitialized((2*dofs_per_element, + qr2.num_nodes), dtype=REAL) + PHI = uninitialized((2, + 2*dofs_per_element, + qr2.num_nodes), dtype=REAL) + # phi_i(x) - phi_i(y) = phi_i(x) + for I in range(self.DoFMap.dofs_per_element): + sf = self.getLocalShapeFunction(I) + k = 0 + for i in range(numQuadNodes0): + for j in range(numQuadNodes1): + PHI[0, I, k] = sf(qr0.nodes[:, i]) + PHI[1, I, k] = 0. + PSI[I, k] = PHI[0, I, k] + k += 1 + # phi_i(x) - phi_i(y) = -phi_i(y) + for I in range(self.DoFMap.dofs_per_element): + sf = self.getLocalShapeFunction(I) + k = 0 + for i in range(numQuadNodes0): + for j in range(numQuadNodes1): + PHI[0, I+dofs_per_element, k] = 0. + PHI[1, I+dofs_per_element, k] = sf(qr1.nodes[:, j]) + PSI[I+dofs_per_element, k] = -PHI[1, I+dofs_per_element, k] + k += 1 + sQR = specialQuadRule(qr2, PSI, PHI3=PHI) + self.distantQuadRules[panel] = sQR + self.distantQuadRulesPtr[panel] = (self.distantQuadRules[panel]) + + if numQuadNodes0 > self.x.shape[0]: + self.x = uninitialized((numQuadNodes0, self.dim), dtype=REAL) + if numQuadNodes1 > self.y.shape[0]: + self.y = uninitialized((numQuadNodes1, self.dim), dtype=REAL) + if numQuadNodes0*numQuadNodes1 > self.temp.shape[0]: + self.temp = uninitialized((numQuadNodes0*numQuadNodes1), dtype={SCALAR}) + self.temp2 = uninitialized((numQuadNodes0*numQuadNodes1), dtype={SCALAR}) + + cdef void getNonSingularNearQuadRule(self, panelType panel): + cdef: + simplexQuadratureRule qr0, qr1 + doubleSimplexQuadratureRule qr2 + specialQuadRule sQR + REAL_t[:, ::1] PSI + INDEX_t I, k, i, j + INDEX_t numQuadNodes0, numQuadNodes1, dofs_per_element + shapeFunction sf + try: + sQR = (self.distantQuadRules[MAX_PANEL+panel]) + except KeyError: + quadOrder = max(ceil(self.target_order), 2) + qr0 = simplexXiaoGimbutas(quadOrder, self.dim) + qr1 = qr0 + qr2 = doubleSimplexQuadratureRule(qr0, qr1) + numQuadNodes0 = qr0.num_nodes + numQuadNodes1 = qr1.num_nodes + dofs_per_element = self.DoFMap.dofs_per_element + PSI = uninitialized((2*dofs_per_element, + qr2.num_nodes), + dtype=REAL) + + # phi_i(x) - phi_i(y) = phi_i(x) + for I in range(dofs_per_element): + sf = self.getLocalShapeFunction(I) + k = 0 + for i in range(numQuadNodes0): + for j in range(numQuadNodes1): + PSI[I, k] = sf(qr0.nodes[:, i]) + k += 1 + # phi_i(x) - phi_i(y) = -phi_i(y) + for I in range(dofs_per_element): + sf = self.getLocalShapeFunction(I) + k = 0 + for i in range(numQuadNodes0): + for j in range(numQuadNodes1): + PSI[I+dofs_per_element, k] = -sf(qr1.nodes[:, j]) + k += 1 + sQR = specialQuadRule(qr2, PSI) + self.distantQuadRules[MAX_PANEL+panel] = sQR + self.distantQuadRulesPtr[MAX_PANEL+panel] = (self.distantQuadRules[MAX_PANEL+panel]) + if numQuadNodes0 > self.x.shape[0]: + self.x = uninitialized((numQuadNodes0, self.dim), dtype=REAL) + if numQuadNodes1 > self.y.shape[0]: + self.y = uninitialized((numQuadNodes1, self.dim), dtype=REAL) + if qr2.num_nodes > self.temp.shape[0]: + self.temp = uninitialized((qr2.num_nodes), dtype={SCALAR}) + + cdef void eval_distant(self, + {SCALAR}_t[::1] contrib, + panelType panel, + MASK_t mask=ALL): + cdef: + INDEX_t k, i, j, I, J + REAL_t vol, vol1 = self.vol1, vol2 = self.vol2 + {SCALAR}_t val + doubleSimplexQuadratureRule qr2 + REAL_t[:, ::1] PSI + REAL_t[:, ::1] simplex1 = self.simplex1 + REAL_t[:, ::1] simplex2 = self.simplex2 + INDEX_t dim = simplex1.shape[1] + REAL_t c1, c2, PSI_I, PSI_J + transformQuadratureRule qr0trans, qr1trans + INDEX_t dofs_per_element, numQuadNodes0, numQuadNodes1 + REAL_t a_b1[3] + REAL_t a_A1[3][3] + REAL_t a_A2[3][3] + REAL_t[::1] b1 + REAL_t[:, ::1] A1, A2 + BOOL_t cutElements = False + + if self.kernel.finiteHorizon: + # check if the horizon might cut the elements + if self.kernel.interaction.relPos == CUT: + cutElements = True + if self.kernel.complement: + cutElements = False + # TODO: cutElements should be set to True, but + # need to figure out the element + # transformation. + + contrib[:] = 0. + + if not cutElements: + vol = vol1*vol2 + if panel < 0: + sQR = (self.distantQuadRulesPtr[MAX_PANEL+panel]) + else: + sQR = (self.distantQuadRulesPtr[panel]) + qr2 = (sQR.qr) + PSI = sQR.PSI + qr2.rule1.nodesInGlobalCoords(simplex1, self.x) + qr2.rule2.nodesInGlobalCoords(simplex2, self.y) + + k = 0 + for i in range(qr2.rule1.num_nodes): + for j in range(qr2.rule2.num_nodes): + self.temp[k] = qr2.weights[k]*self.kernel.evalPtr(dim, + &self.x[i, 0], + &self.y[j, 0]) + k += 1 + + k = 0 + for I in range(2*self.DoFMap.dofs_per_element): + for J in range(I, 2*self.DoFMap.dofs_per_element): + if mask[k]: + val = 0. + for i in range(qr2.num_nodes): + val += self.temp[i] * PSI[I, i] * PSI[J, i] + contrib[k] = val*vol + k += 1 + else: + if panel < 0: + sQR = (self.distantQuadRulesPtr[MAX_PANEL+panel]) + else: + sQR = (self.distantQuadRulesPtr[panel]) + qr2 = (sQR.qr) + if sQR.qrTransformed0 is not None: + qr0trans = sQR.qrTransformed0 + else: + qr0 = qr2.rule1 + qr0trans = transformQuadratureRule(qr0) + sQR.qrTransformed0 = qr0trans + if sQR.qrTransformed1 is not None: + qr1trans = sQR.qrTransformed1 + else: + qr1 = qr2.rule2 + qr1trans = transformQuadratureRule(qr1) + sQR.qrTransformed1 = qr1trans + + numQuadNodes0 = qr0trans.num_nodes + numQuadNodes1 = qr1trans.num_nodes + + vol = vol1*vol2 + dofs_per_element = self.DoFMap.dofs_per_element + + A1 = a_A1 + b1 = a_b1 + A2 = a_A2 + + self.kernel.interaction.startLoopSubSimplices_Simplex(simplex1, simplex2) + while self.kernel.interaction.nextSubSimplex_Simplex(A1, b1, &c1): + qr0trans.setAffineBaryTransform(A1, b1) + qr0trans.nodesInGlobalCoords(simplex1, self.x) + for i in range(qr0trans.num_nodes): + self.kernel.interaction.startLoopSubSimplices_Node(self.x[i, :], simplex2) + while self.kernel.interaction.nextSubSimplex_Node(A2, &c2): + qr1trans.setLinearBaryTransform(A2) + qr1trans.nodesInGlobalCoords(simplex2, self.y) + for j in range(qr1trans.num_nodes): + val = qr0trans.weights[i]*qr1trans.weights[j]*self.kernel.evalPtr(dim, &self.x[i, 0], &self.y[j, 0]) + val *= c1 * c2 * vol + k = 0 + for I in range(2*dofs_per_element): + if I < dofs_per_element: + PSI_I = self.getLocalShapeFunction(I).evalStrided(&qr0trans.nodes[0, i], numQuadNodes0) + else: + PSI_I = -self.getLocalShapeFunction(I-dofs_per_element).evalStrided(&qr1trans.nodes[0, j], numQuadNodes1) + for J in range(I, 2*dofs_per_element): + if mask[k]: + if J < dofs_per_element: + PSI_J = self.getLocalShapeFunction(J).evalStrided(&qr0trans.nodes[0, i], numQuadNodes0) + else: + PSI_J = -self.getLocalShapeFunction(J-dofs_per_element).evalStrided(&qr1trans.nodes[0, j], numQuadNodes1) + contrib[k] += val * PSI_I*PSI_J + k += 1 + + + cdef void eval_distant_nonsym(self, + {SCALAR}_t[::1] contrib, + panelType panel, + MASK_t mask=ALL): + cdef: + INDEX_t k, i, j, I, J + REAL_t vol, vol1 = self.vol1, vol2 = self.vol2 + {SCALAR}_t val, val2 + doubleSimplexQuadratureRule qr2 + REAL_t[:, ::1] PSI + REAL_t[:, :, ::1] PHI + REAL_t[:, ::1] simplex1 = self.simplex1 + REAL_t[:, ::1] simplex2 = self.simplex2 + INDEX_t dim = simplex1.shape[1] + BOOL_t cutElements = False + REAL_t w + REAL_t c1, c2, PHI_I_0, PHI_I_1, PSI_J + transformQuadratureRule qr0trans, qr1trans + INDEX_t dofs_per_element, numQuadNodes0, numQuadNodes1 + REAL_t a_b1[3] + REAL_t a_A1[3][3] + REAL_t a_A2[3][3] + REAL_t[::1] b1 + REAL_t[:, ::1] A1, A2 + + if self.kernel.finiteHorizon: + # check if the horizon might cut the elements + if self.kernel.interaction.relPos == CUT: + cutElements = True + if self.kernel.complement: + cutElements = False + # TODO: cutElements should be set to True, but + # need to figure out the element + # transformation. + + contrib[:] = 0. + + if not cutElements: + vol = vol1*vol2 + if panel < 0: + sQR = (self.distantQuadRulesPtr[MAX_PANEL+panel]) + else: + sQR = (self.distantQuadRulesPtr[panel]) + qr2 = (sQR.qr) + PSI = sQR.PSI + PHI = sQR.PHI3 + qr2.rule1.nodesInGlobalCoords(simplex1, self.x) + qr2.rule2.nodesInGlobalCoords(simplex2, self.y) + + k = 0 + for i in range(qr2.rule1.num_nodes): + for j in range(qr2.rule2.num_nodes): + w = qr2.weights[k] + self.temp[k] = w * self.kernel.evalPtr(dim, + &self.x[i, 0], + &self.y[j, 0]) + self.temp2[k] = w * self.kernel.evalPtr(dim, + &self.y[j, 0], + &self.x[i, 0]) + k += 1 + + k = 0 + for I in range(2*self.DoFMap.dofs_per_element): + for J in range(2*self.DoFMap.dofs_per_element): + if mask[k]: + val = 0. + for i in range(qr2.num_nodes): + val += (self.temp[i] * PHI[0, I, i] - self.temp2[i] * PHI[1, I, i]) * PSI[J, i] + contrib[k] = val*vol + k += 1 + else: + if panel < 0: + sQR = (self.distantQuadRulesPtr[MAX_PANEL+panel]) + else: + sQR = (self.distantQuadRulesPtr[panel]) + qr2 = (sQR.qr) + if sQR.qrTransformed0 is not None: + qr0trans = sQR.qrTransformed0 + else: + qr0 = qr2.rule1 + qr0trans = transformQuadratureRule(qr0) + sQR.qrTransformed0 = qr0trans + if sQR.qrTransformed1 is not None: + qr1trans = sQR.qrTransformed1 + else: + qr1 = qr2.rule2 + qr1trans = transformQuadratureRule(qr1) + sQR.qrTransformed1 = qr1trans + + numQuadNodes0 = qr0trans.num_nodes + numQuadNodes1 = qr1trans.num_nodes + + vol = vol1*vol2 + dofs_per_element = self.DoFMap.dofs_per_element + + A1 = a_A1 + b1 = a_b1 + A2 = a_A2 + + self.kernel.interaction.startLoopSubSimplices_Simplex(simplex1, simplex2) + while self.kernel.interaction.nextSubSimplex_Simplex(A1, b1, &c1): + qr0trans.setAffineBaryTransform(A1, b1) + qr0trans.nodesInGlobalCoords(simplex1, self.x) + for i in range(qr0trans.num_nodes): + self.kernel.interaction.startLoopSubSimplices_Node(self.x[i, :], simplex2) + while self.kernel.interaction.nextSubSimplex_Node(A2, &c2): + qr1trans.setLinearBaryTransform(A2) + qr1trans.nodesInGlobalCoords(simplex2, self.y) + for j in range(qr1trans.num_nodes): + w = qr0trans.weights[i]*qr1trans.weights[j]*c1 * c2 * vol + val = w*self.kernel.evalPtr(dim, &self.x[i, 0], &self.y[j, 0]) + val2 = w*self.kernel.evalPtr(dim, &self.y[j, 0], &self.x[i, 0]) + k = 0 + for I in range(2*dofs_per_element): + if I < dofs_per_element: + PHI_I_0 = self.getLocalShapeFunction(I).evalStrided(&qr0trans.nodes[0, i], numQuadNodes0) + PHI_I_1 = 0. + else: + PHI_I_0 = 0. + PHI_I_1 = self.getLocalShapeFunction(I-dofs_per_element).evalStrided(&qr1trans.nodes[0, j], numQuadNodes1) + for J in range(2*dofs_per_element): + if mask[k]: + if J < dofs_per_element: + PSI_J = self.getLocalShapeFunction(J).evalStrided(&qr0trans.nodes[0, i], numQuadNodes0) + else: + PSI_J = -self.getLocalShapeFunction(J-dofs_per_element).evalStrided(&qr1trans.nodes[0, j], numQuadNodes1) + contrib[k] += (val * PHI_I_0 - val2 * PHI_I_1) * PSI_J + k += 1 + + cdef void addQuadRule_boundary(self, panelType panel): + cdef: + simplexQuadratureRule qr0, qr1 + doubleSimplexQuadratureRule qr2 + specialQuadRule sQR + REAL_t[:, ::1] PHI + INDEX_t i, j, k, l + shapeFunction sf + qr0 = simplexXiaoGimbutas(panel, self.dim) + qr1 = simplexDuffyTransformation(panel, self.dim, self.dim-1) + qr2 = doubleSimplexQuadratureRule(qr0, qr1) + PHI = uninitialized((self.DoFMap.dofs_per_element, qr2.num_nodes), dtype=REAL) + for i in range(self.DoFMap.dofs_per_element): + sf = self.getLocalShapeFunction(i) + for j in range(qr2.rule1.num_nodes): + for k in range(qr2.rule2.num_nodes): + l = j*qr2.rule2.num_nodes+k + PHI[i, l] = sf(qr2.rule1.nodes[:, j]) + sQR = specialQuadRule(qr2, PHI=PHI) + self.distantQuadRules[panel] = sQR + self.distantQuadRulesPtr[panel] = (self.distantQuadRules[panel]) + + if qr2.rule1.num_nodes > self.x.shape[0]: + self.x = uninitialized((qr2.rule1.num_nodes, self.dim), dtype=REAL) + if qr2.rule2.num_nodes > self.y.shape[0]: + self.y = uninitialized((qr2.rule2.num_nodes, self.dim), dtype=REAL) + if qr2.num_nodes > self.temp.shape[0]: + self.temp = uninitialized((qr2.num_nodes), dtype=REAL) + + cdef void eval_distant_boundary(self, + {SCALAR}_t[::1] contrib, + panelType panel, + MASK_t mask=ALL): + cdef: + INDEX_t k, m, i, j, I, J + REAL_t vol, valReal, vol1 = self.vol1, vol2 = self.vol2 + {SCALAR}_t val + doubleSimplexQuadratureRule qr2 + REAL_t[:, ::1] PHI + REAL_t[:, ::1] simplex1 = self.simplex1 + REAL_t[:, ::1] simplex2 = self.simplex2 + INDEX_t dim = simplex1.shape[1] + REAL_t normW, nw + + # Kernel: + # \Gamma(x,y) = n \dot (x-y) * C(d,s) / (2s) / |x-y|^{d+2s} + # with inward normal n. + # + # Rewrite as + # \Gamma(x,y) = [ n \dot (x-y)/|x-y| ] * [ C(d,s) / (2s) / |x-y|^{d-1+2s} ] + # \--------------------------------/ + # | + # boundaryKernel + # + # n is independent of x and y + if dim == 2: + self.n[0] = simplex2[1, 1] - simplex2[0, 1] + self.n[1] = simplex2[0, 0] - simplex2[1, 0] + # F is same as vol2 + valReal = 1./sqrt(mydot(self.n, self.n)) + self.n[0] *= valReal + self.n[1] *= valReal + elif dim == 3: + for j in range(dim): + self.x[0, j] = simplex2[1, j]-simplex2[0, j] + for j in range(dim): + self.x[1, j] = simplex2[2, j]-simplex2[0, j] + self.n[0] = self.x[0, 1]*self.x[1, 2]-self.x[0, 2]*self.x[1, 1] + self.n[1] = self.x[0, 2]*self.x[1, 0]-self.x[0, 0]*self.x[1, 2] + self.n[2] = self.x[0, 0]*self.x[1, 1]-self.x[0, 1]*self.x[1, 0] + valReal = 1./sqrt(mydot(self.n, self.n)) + self.n[0] *= valReal + self.n[1] *= valReal + self.n[2] *= valReal + + contrib[:] = 0. + + vol = vol1*vol2 + if panel < 0: + sQR = (self.distantQuadRulesPtr[MAX_PANEL+panel]) + else: + sQR = (self.distantQuadRulesPtr[panel]) + qr2 = (sQR.qr) + PHI = sQR.PHI + qr2.rule1.nodesInGlobalCoords(simplex1, self.x) + qr2.rule2.nodesInGlobalCoords(simplex2, self.y) + + for k in range(qr2.rule1.num_nodes): + for m in range(qr2.rule2.num_nodes): + if dim == 1: + nw = 1. + else: + normW = 0. + for j in range(dim): + self.w[j] = self.y[m, j]-self.x[k, j] + normW += self.w[j]**2 + normW = 1./sqrt(normW) + for j in range(dim): + self.w[j] *= normW + nw = mydot(self.n, self.w) + i = k*qr2.rule2.num_nodes+m + self.temp[i] = qr2.weights[i] * nw * self.kernel.evalPtr(dim, &self.x[k, 0], &self.y[m, 0]) + + k = 0 + for I in range(self.DoFMap.dofs_per_element): + for J in range(I, self.DoFMap.dofs_per_element): + if mask[k]: + val = 0. + for i in range(qr2.num_nodes): + val += self.temp[i] * PHI[I, i] * PHI[J, i] + contrib[k] = val*vol + k += 1 + diff --git a/nl/PyNucleus_nl/nonlocalLaplacian_decl_{SCALAR}.pxi b/nl/PyNucleus_nl/nonlocalLaplacian_decl_{SCALAR}.pxi new file mode 100644 index 00000000..c7199810 --- /dev/null +++ b/nl/PyNucleus_nl/nonlocalLaplacian_decl_{SCALAR}.pxi @@ -0,0 +1,27 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + +cdef class {SCALAR_label}nonlocalBuilder: + cdef: + meshBase mesh + public DoFMap dm + public DoFMap dm2 + public {SCALAR_label}Kernel kernel + public {SCALAR_label}double_local_matrix_t local_matrix + public {SCALAR_label}double_local_matrix_t local_matrix_zeroExterior + public {SCALAR_label}double_local_matrix_t local_matrix_surface + BOOL_t zeroExterior + {SCALAR}_t[::1] contrib, contribZeroExterior + list _d2c + public MPI.Comm comm + public FakePLogger PLogger + public dict params + cdef inline {SCALAR_label}double_local_matrix_t getLocalMatrix(self, dict params) + cdef inline {SCALAR_label}double_local_matrix_t getLocalMatrixBoundaryZeroExterior(self, dict params, BOOL_t infHorizon) + cpdef {SCALAR}_t getEntry(self, INDEX_t I, INDEX_t J) + cpdef {SCALAR}_t getEntryCluster(self, INDEX_t I, INDEX_t J) + cpdef LinearOperator assembleClusters(self, list Pnear, bint forceUnsymmetric=*, LinearOperator Anear=*, dict jumps=*, str prefix=*, tree_node myRoot=*, BOOL_t doDistributedAssembly=*) diff --git a/nl/PyNucleus_nl/nonlocalLaplacian_{SCALAR}.pxi b/nl/PyNucleus_nl/nonlocalLaplacian_{SCALAR}.pxi new file mode 100644 index 00000000..9fe03892 --- /dev/null +++ b/nl/PyNucleus_nl/nonlocalLaplacian_{SCALAR}.pxi @@ -0,0 +1,2675 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + +cdef class {SCALAR_label}IndexManager: + cdef: + DoFMap dm + indexSet myDofs + public INDEX_t[::1] localDoFs + INDEX_t[::1] permutedDoFsLocal + INDEX_t[:, ::1] idxCellFlip + {SCALAR_label}LinearOperator A + sparsityPattern sP + public dict cache + intTuple hv + + def __init__(self, DoFMap dm, {SCALAR_label}LinearOperator A=None, cellPairIdentifierSize=1, indexSet myDofs=None, sparsityPattern sP=None): + cdef: + INDEX_t[:, ::1] idxCellFlip + INDEX_t j, offset + self.dm = dm + self.myDofs = myDofs + self.localDoFs = uninitialized((2*self.dm.dofs_per_element), dtype=INDEX) + self.permutedDoFsLocal = uninitialized((2*self.dm.dofs_per_element), dtype=INDEX) + self.hv = intTuple.create(uninitialized(cellPairIdentifierSize, dtype=INDEX)) + self.A = A + self.sP = sP + if self.dm.mesh.manifold_dim == 1: + idxCellFlip = uninitialized((2, self.dm.dofs_per_element), dtype=INDEX) + for j in range(self.dm.dofs_per_vertex): + idxCellFlip[0, j] = j + idxCellFlip[0, self.dm.dofs_per_vertex+j] = self.dm.dofs_per_vertex+j + + idxCellFlip[1, j] = self.dm.dofs_per_vertex+j + idxCellFlip[1, self.dm.dofs_per_vertex+j] = j + offset = 2*self.dm.dofs_per_vertex + for j in range(self.dm.dofs_per_cell): + idxCellFlip[0, offset+j] = offset+j + idxCellFlip[1, offset+self.dm.dofs_per_cell-1-j] = offset+j + + elif self.dm.mesh.manifold_dim == 2: + idxCellFlip = uninitialized((3, self.dm.dofs_per_element), dtype=INDEX) + for j in range(self.dm.dofs_per_vertex): + idxCellFlip[0, 0*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j + idxCellFlip[0, 1*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j + idxCellFlip[0, 2*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j + + idxCellFlip[1, 0*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j + idxCellFlip[1, 1*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j + idxCellFlip[1, 2*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j + + idxCellFlip[2, 0*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j + idxCellFlip[2, 1*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j + idxCellFlip[2, 2*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j + elif self.dm.mesh.manifold_dim == 3: + idxCellFlip = uninitialized((12, self.dm.dofs_per_element), dtype=INDEX) + for j in range(self.dm.dofs_per_vertex): + idxCellFlip[0, 0*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j + idxCellFlip[0, 1*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j + idxCellFlip[0, 2*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j + idxCellFlip[0, 3*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j + + idxCellFlip[1, 0*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j + idxCellFlip[1, 1*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j + idxCellFlip[1, 2*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j + idxCellFlip[1, 3*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j + + idxCellFlip[2, 0*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j + idxCellFlip[2, 1*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j + idxCellFlip[2, 2*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j + idxCellFlip[2, 3*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j + + idxCellFlip[3, 0*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j + idxCellFlip[3, 1*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j + idxCellFlip[3, 2*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j + idxCellFlip[3, 3*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j + + idxCellFlip[4, 0*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j + idxCellFlip[4, 1*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j + idxCellFlip[4, 2*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j + idxCellFlip[4, 3*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j + + idxCellFlip[5, 0*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j + idxCellFlip[5, 1*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j + idxCellFlip[5, 2*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j + idxCellFlip[5, 3*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j + + idxCellFlip[6, 0*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j + idxCellFlip[6, 1*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j + idxCellFlip[6, 2*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j + idxCellFlip[6, 3*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j + + idxCellFlip[7, 0*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j + idxCellFlip[7, 1*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j + idxCellFlip[7, 2*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j + idxCellFlip[7, 3*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j + + idxCellFlip[8, 0*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j + idxCellFlip[8, 1*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j + idxCellFlip[8, 2*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j + idxCellFlip[8, 3*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j + + idxCellFlip[9, 0*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j + idxCellFlip[9, 1*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j + idxCellFlip[9, 2*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j + idxCellFlip[9, 3*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j + + idxCellFlip[10, 0*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j + idxCellFlip[10, 1*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j + idxCellFlip[10, 2*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j + idxCellFlip[10, 3*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j + + idxCellFlip[11, 0*self.dm.dofs_per_vertex+j] = 3*self.dm.dofs_per_vertex+j + idxCellFlip[11, 1*self.dm.dofs_per_vertex+j] = 0*self.dm.dofs_per_vertex+j + idxCellFlip[11, 2*self.dm.dofs_per_vertex+j] = 1*self.dm.dofs_per_vertex+j + idxCellFlip[11, 3*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j + + else: + raise NotImplementedError() + self.idxCellFlip = idxCellFlip + self.cache = {} + + cdef inline void getDoFsElem(self, INDEX_t cellNo): + cdef: + INDEX_t p, dof + for p in range(self.dm.dofs_per_element): + self.localDoFs[p] = self.dm.cell2dof(cellNo, p) + if self.myDofs is not None: + for p in range(self.dm.dofs_per_element): + dof = self.localDoFs[p] + if not self.myDofs.inSet(dof): + self.localDoFs[p] = -1 + + cdef inline BOOL_t getDoFsElemElem(self, INDEX_t cellNo1, INDEX_t cellNo2): + cdef: + INDEX_t p, dof + BOOL_t canSkip = True + for p in range(self.dm.dofs_per_element): + dof = self.dm.cell2dof(cellNo1, p) + self.localDoFs[p] = dof + canSkip = canSkip and dof < 0 + for p in range(self.dm.dofs_per_element): + dof = self.dm.cell2dof(cellNo2, p) + self.localDoFs[self.dm.dofs_per_element+p] = dof + canSkip = canSkip and dof < 0 + return canSkip + + cdef inline void addToMatrixElemSym(self, const {SCALAR}_t[::1] contrib, REAL_t fac): + cdef: + INDEX_t k, p, q, I, J + k = 0 + for p in range(self.dm.dofs_per_element): + I = self.localDoFs[p] + if I >= 0: + self.A.addToEntry(I, I, fac*contrib[k]) + k += 1 + for q in range(p+1, self.dm.dofs_per_element): + J = self.localDoFs[q] + if J >= 0: + self.A.addToEntry(I, J, fac*contrib[k]) + self.A.addToEntry(J, I, fac*contrib[k]) + k += 1 + else: + k += self.dm.dofs_per_element-p + + cdef inline void addToMatrixElem(self, const {SCALAR}_t[::1] contrib, REAL_t fac): + cdef: + INDEX_t k, p, q, I, J + k = 0 + for p in range(self.dm.dofs_per_element): + I = self.localDoFs[p] + if I >= 0: + for q in range(self.dm.dofs_per_element): + J = self.localDoFs[q] + if J >= 0: + self.A.addToEntry(I, J, fac*contrib[k]) + k += 1 + else: + k += self.dm.dofs_per_element + + cdef inline void addToSparsityElemElemSym(self): + # Add symmetric 'contrib' to elements i and j in symmetric fashion + cdef: + INDEX_t k, p, q, I, J + k = 0 + for p in range(2*self.dm.dofs_per_element): + I = self.localDoFs[p] + if I >= 0: + self.sP.add(I, I) + k += 1 + for q in range(p+1, 2*self.dm.dofs_per_element): + J = self.localDoFs[q] + if J >= 0: + self.sP.add(I, J) + self.sP.add(J, I) + k += 1 + else: + k += 2*self.dm.dofs_per_element-p + + cdef inline void addToMatrixElemElemSym(self, const {SCALAR}_t[::1] contrib, REAL_t fac): + # Add symmetric 'contrib' to elements i and j in symmetric fashion + cdef: + INDEX_t k, p, q, I, J + k = 0 + for p in range(2*self.dm.dofs_per_element): + I = self.localDoFs[p] + if I >= 0: + self.A.addToEntry(I, I, fac*contrib[k]) + k += 1 + for q in range(p+1, 2*self.dm.dofs_per_element): + J = self.localDoFs[q] + if J >= 0: + self.A.addToEntry(I, J, fac*contrib[k]) + self.A.addToEntry(J, I, fac*contrib[k]) + k += 1 + else: + k += 2*self.dm.dofs_per_element-p + + cdef inline void addToSparsityElemElem(self): + # Add general 'contrib' to elements i and j + cdef: + INDEX_t k, p, q, I, J + k = 0 + for p in range(2*self.dm.dofs_per_element): + I = self.localDoFs[p] + if I >= 0: + for q in range(2*self.dm.dofs_per_element): + J = self.localDoFs[q] + if J >= 0: + self.sP.add(I, J) + k += 1 + else: + k += 2*self.dm.dofs_per_element + + cdef inline void addToMatrixElemElem(self, const {SCALAR}_t[::1] contrib, REAL_t fac): + # Add general 'contrib' to elements i and j + cdef: + INDEX_t k, p, q, I, J + k = 0 + for p in range(2*self.dm.dofs_per_element): + I = self.localDoFs[p] + if I >= 0: + for q in range(2*self.dm.dofs_per_element): + J = self.localDoFs[q] + if J >= 0: + self.A.addToEntry(I, J, fac*contrib[k]) + k += 1 + else: + k += 2*self.dm.dofs_per_element + + def buildMasksForClusters_py(self, list clusterList, bint useSymmetricCells): + cdef: + INDEX_t startCluster = 0 + return self.buildMasksForClusters(clusterList, useSymmetricCells, &startCluster) + + cdef tupleDictMASK buildMasksForClusters(self, list clusterList, bint useSymmetricCells, INDEX_t *startCluster): + cdef: + nearFieldClusterPair cluster = clusterList[0] + MASK_t cellMask1, cellMask2 + indexSet cellsUnion = cluster.cellsUnion + indexSetIterator it = cellsUnion.getIter(), it2 = cellsUnion.getIter() + indexSet clusterDofs1, clusterDofs2 + INDEX_t cellNo1 = -1, cellNo2 = -1 + INDEX_t[::1] cellPair = uninitialized((2), dtype=INDEX) + INDEX_t[::1] cellPair2 = uninitialized((2), dtype=INDEX) + tupleDictMASK masks = tupleDictMASK(self.dm.mesh.num_cells, deleteHits=False, logicalAndHits=True, length_inc=20) + INDEX_t p, I + # dict cellMasks1, cellMasks2 + MASK_t mask, mask1, mask2, cellMask11, cellMask12, cellMask21, cellMask22, k + INDEX_t dofs_per_element = self.dm.dofs_per_element + map[INDEX_t, MASK_t] cellMasks1 + map[INDEX_t, MASK_t] cellMasks2 + + cellMask1.reset() + cellMask2.reset() + for cluster in clusterList[startCluster[0]:]: + startCluster[0] += 1 + cellsUnion = cluster.cellsUnion + # cellMasks1 = {} + # cellMasks2 = {} + clusterDofs1 = cluster.n1.get_dofs() + clusterDofs2 = cluster.n2.get_dofs() + + it.setIndexSet(cellsUnion) + + while it.step(): + cellNo1 = it.i + mask1.reset() + mask2.reset() + k = 1 + for p in range(dofs_per_element): + I = self.dm.cell2dof(cellNo1, p) + if I >= 0: + if clusterDofs1.inSet(I): + mask1 |= k + if clusterDofs2.inSet(I): + mask2 |= k + k = k << 1 + cellMasks1[cellNo1] = mask1 + cellMasks2[cellNo1] = mask2 + + if not useSymmetricCells: + # TODO: Think some more about this branch, maybe this can be improved. + it.reset() + it2.setIndexSet(cellsUnion) + # it.setIndexSet(cluster.n1.cells) + # it2.setIndexSet(cluster.n2.cells) + while it.step(): + cellNo1 = it.i + cellPair[0] = cellNo1 + cellMask11 = cellMasks1[cellNo1] + cellMask12 = cellMasks2[cellNo1] + it2.reset() + while it2.step(): + cellNo2 = it2.i + cellMask21 = cellMasks1[cellNo2] + cellMask22 = cellMasks2[cellNo2] + cellMask1 = cellMask11 | (cellMask21 << dofs_per_element) + cellMask2 = cellMask12 | (cellMask22 << dofs_per_element) + if (cellMask1.none()) or (cellMask2.none()): + continue + cellPair[1] = cellNo2 + mask = self.getElemElemMask(cellMask1, cellMask2) + # does a logical "and" if there already is an entry + masks.enterValue(cellPair, mask) + else: + it.setIndexSet(cluster.n1.cells) + it2.setIndexSet(cluster.n2.cells) + while it.step(): + cellNo1 = it.i + cellPair[0] = cellNo1 + cellPair2[1] = cellNo1 + cellMask11 = cellMasks1[cellNo1] + cellMask12 = cellMasks2[cellNo1] + it2.reset() + while it2.step(): + cellNo2 = it2.i + cellMask21 = cellMasks1[cellNo2] + cellMask22 = cellMasks2[cellNo2] + if cellNo1 > cellNo2: + cellMask1 = cellMask21 | (cellMask11 << dofs_per_element) + cellMask2 = cellMask22 | (cellMask12 << dofs_per_element) + if (cellMask1.none()) or (cellMask2.none()): + continue + cellPair2[0] = cellNo2 + mask = self.getElemElemSymMask(cellMask1, cellMask2) + # does a logical "and" if there already is an entry + masks.enterValue(cellPair2, mask) + else: + cellMask1 = cellMask11 | (cellMask21 << dofs_per_element) + cellMask2 = cellMask12 | (cellMask22 << dofs_per_element) + if (cellMask1.none()) or (cellMask2.none()): + continue + cellPair[1] = cellNo2 + mask = self.getElemElemSymMask(cellMask1, cellMask2) + # does a logical "and" if there already is an entry + masks.enterValue(cellPair, mask) + + if masks.nnz > 10000000: + break + + return masks + + # cdef inline MASK_t getElemSymEntryMask(self, INDEX_t cellNo1, INDEX_t I, INDEX_t J): + # # Add symmetric 'contrib' to elements i and j in symmetric fashion + # cdef: + # INDEX_t p, q, K, L + # MASK_t k = 1 + # MASK_t mask = 0 + # for p in range(self.dm.dofs_per_element): + # K = self.dm.cell2dof(cellNo1, p) + # for q in range(p, self.dm.dofs_per_element): + # L = self.dm.cell2dof(cellNo1, q) + # if (I == K and J == L) or (J == K and I == L): + # mask |= k + # k = k << 1 + # return mask + + cdef inline MASK_t getElemElemSymMask(self, MASK_t mask_dofs1, MASK_t mask_dofs2): + # Add symmetric 'contrib' to elements i and j in symmetric fashion + cdef: + INDEX_t p, q + MASK_t k = 1 + MASK_t mask + mask.reset() + for p in range(2*self.dm.dofs_per_element): + if mask_dofs1[p]: + for q in range(p, 2*self.dm.dofs_per_element): + if mask_dofs2[q]: + mask |= k + k = k << 1 + else: + k = k << (2*self.dm.dofs_per_element-p) + return mask + + cdef inline MASK_t getElemElemMask(self, MASK_t mask_dofs1, MASK_t mask_dofs2): + # Add symmetric 'contrib' to elements i and j in symmetric fashion + cdef: + INDEX_t p, q + MASK_t k = 1 + MASK_t mask + mask.reset() + for p in range(2*self.dm.dofs_per_element): + if mask_dofs1[p]: + for q in range(2*self.dm.dofs_per_element): + if mask_dofs2[q]: + mask |= k + k = k << 1 + else: + k = k << (2*self.dm.dofs_per_element) + return mask + + cdef inline MASK_t getElemSymMask(self): + # Add symmetric 'contrib' to elements i and j in symmetric fashion + cdef: + INDEX_t p, q + MASK_t k = 1 + MASK_t mask + mask.reset() + for p in range(self.dm.dofs_per_element): + if self.localDoFs[p] >= 0: + for q in range(p, self.dm.dofs_per_element): + if self.localDoFs[q] >= 0: + mask |= k + k = k << 1 + else: + k = k << (self.dm.dofs_per_element-p) + return mask + + cdef inline MASK_t getElemElemSymEntryMask(self, INDEX_t cellNo1, INDEX_t cellNo2, INDEX_t I, INDEX_t J): + # Add symmetric 'contrib' to elements i and j in symmetric fashion + cdef: + INDEX_t p, q, K, L + MASK_t k = 1 + MASK_t mask + mask.reset() + for p in range(2*self.dm.dofs_per_element): + if p < self.dm.dofs_per_element: + K = self.dm.cell2dof(cellNo1, p) + else: + K = self.dm.cell2dof(cellNo2, p-self.dm.dofs_per_element) + + for q in range(p, 2*self.dm.dofs_per_element): + if q < self.dm.dofs_per_element: + L = self.dm.cell2dof(cellNo1, q) + else: + L = self.dm.cell2dof(cellNo2, q-self.dm.dofs_per_element) + if (I == K and J == L) or (J == K and I == L): + mask |= k + k = k << 1 + return mask + + cdef inline void addToMatrixElemElemSymMasked(self, const {SCALAR}_t[::1] contrib, REAL_t fac, MASK_t mask): + # Add symmetric 'contrib' to elements i and j in symmetric fashion + cdef: + INDEX_t k, p, q, I, J + MASK_t one = 1 + k = 0 + for p in range(2*self.dm.dofs_per_element): + I = self.localDoFs[p] + if mask[k]: + self.A.addToEntry(I, I, fac*contrib[k]) + k += 1 + for q in range(p+1, 2*self.dm.dofs_per_element): + if mask[k]: + J = self.localDoFs[q] + self.A.addToEntry(I, J, fac*contrib[k]) + self.A.addToEntry(J, I, fac*contrib[k]) + k += 1 + + cdef inline void addToMatrixElemElemMasked(self, const {SCALAR}_t[::1] contrib, REAL_t fac, MASK_t mask): + # Add unsymmetric 'contrib' to elements i and j in unsymmetric fashion + cdef: + INDEX_t k, p, q, I, J + MASK_t one = 1 + k = 0 + for p in range(2*self.dm.dofs_per_element): + I = self.localDoFs[p] + for q in range(2*self.dm.dofs_per_element): + if mask[k]: + J = self.localDoFs[q] + self.A.addToEntry(I, J, fac*contrib[k]) + k += 1 + + cdef void addToCache(self, {SCALAR}_t[::1] contrib, INDEX_t[::1] ID, INDEX_t perm, BOOL_t inv=False): + cdef: + intTuple hv = intTuple.create(ID) + contribNew = uninitialized((contrib.shape[0]), dtype=REAL) + self.permute(contrib, contribNew, perm, inv) + self.cache[hv] = contribNew + + cdef void permute(self, {SCALAR}_t[::1] contrib, {SCALAR}_t[::1] contribNew, INDEX_t perm, BOOL_t inv=False): + cdef: + INDEX_t K, p, q + INDEX_t k, i, j + INDEX_t dofs_per_element = self.dm.dofs_per_element + INDEX_t dofs_per_element2 = 2*dofs_per_element + BOOL_t perm0 = perm & 1 + INDEX_t perm1 = (perm >> 1) & 3 + INDEX_t perm2 = (perm >> 3) & 3 + INDEX_t[::1] permutedDoFsLocal = self.permutedDoFsLocal + if inv and self.dm.dim == 2: + if perm1 == 1: + perm1 = 2 + elif perm1 == 2: + perm1 = 1 + + if perm2 == 1: + perm2 = 2 + elif perm2 == 2: + perm2 = 1 + if perm0: + perm1, perm2 = perm2, perm1 + + for p in range(dofs_per_element2): + if perm0: + i = p+dofs_per_element + if i >= dofs_per_element2: + i -= dofs_per_element2 + else: + i = p + if (i < dofs_per_element): + i = self.idxCellFlip[perm1, i] + else: + i = dofs_per_element + self.idxCellFlip[perm2, i-dofs_per_element] + permutedDoFsLocal[p] = i + + K = 0 + for p in range(dofs_per_element2): + i = permutedDoFsLocal[p] + + k = 2*dofs_per_element*i-(i*(i+1) >> 1) + i + contribNew[K] = contrib[k] + K += 1 + + for q in range(p+1, dofs_per_element2): + j = permutedDoFsLocal[q] + + if i > j: + k = dofs_per_element2*j-(j*(j+1) >> 1) + i + else: + k = dofs_per_element2*i-(i*(i+1) >> 1) + j + contribNew[K] = contrib[k] + K += 1 + + def __repr__(self): + s = '' + s += 'Cache size: {}'.format(len(self.cache)) + return s + + +cdef inline {SCALAR}_t extractElemSymMasked{SCALAR_label}(DoFMap DoFMap, const {SCALAR}_t[::1] contrib, REAL_t fac, MASK_t mask): + # Add symmetric 'contrib' to elements i and j in symmetric fashion + cdef: + INDEX_t k, p, q + {SCALAR}_t s = 0. + k = 0 + for p in range(DoFMap.dofs_per_element): + for q in range(p, DoFMap.dofs_per_element): + if mask[k]: + s += fac*contrib[k] + k += 1 + return s + + +cdef inline {SCALAR}_t extractElemElemSymMasked{SCALAR_label}(DoFMap DoFMap, const {SCALAR}_t[::1] contrib, REAL_t fac, MASK_t mask): + # Add symmetric 'contrib' to elements i and j in symmetric fashion + cdef: + INDEX_t k, p, q + {SCALAR}_t s = 0. + k = 0 + for p in range(2*DoFMap.dofs_per_element): + for q in range(p, 2*DoFMap.dofs_per_element): + if mask[k]: + s += fac*contrib[k] + k += 1 + return s + + +cdef class {SCALAR_label}IndexManagerVector({SCALAR_label}IndexManager): + cdef: + {SCALAR_label}VectorLinearOperator vecA + INDEX_t vectorSize + + def __init__(self, DoFMap dm, {SCALAR_label}VectorLinearOperator A=None, cellPairIdentifierSize=1, indexSet myDofs=None, sparsityPattern sP=None): + super({SCALAR_label}IndexManagerVector, self).__init__(dm, None, cellPairIdentifierSize, myDofs, sP) + self.vecA = A + self.vectorSize = A.vectorSize + + cdef inline void addToMatrixElemSymVector(self, {SCALAR}_t[:, ::1] contrib, REAL_t fac): + cdef: + INDEX_t k, p, q, I, J + for p in range(contrib.shape[0]): + for q in range(self.vectorSize): + contrib[p, q] *= fac + k = 0 + for p in range(self.dm.dofs_per_element): + I = self.localDoFs[p] + if I >= 0: + self.vecA.addToEntry(I, I, contrib[k, :]) + k += 1 + for q in range(p+1, self.dm.dofs_per_element): + J = self.localDoFs[q] + if J >= 0: + self.vecA.addToEntry(I, J, contrib[k, :]) + self.vecA.addToEntry(J, I, contrib[k, :]) + k += 1 + else: + k += self.dm.dofs_per_element-p + + cdef inline void addToMatrixElemVector(self, {SCALAR}_t[:, ::1] contrib, REAL_t fac): + cdef: + INDEX_t k, p, q, I, J + for p in range(contrib.shape[0]): + for q in range(self.vectorSize): + contrib[p, q] *= fac + k = 0 + for p in range(self.dm.dofs_per_element): + I = self.localDoFs[p] + if I >= 0: + for q in range(self.dm.dofs_per_element): + J = self.localDoFs[q] + if J >= 0: + self.vecA.addToEntry(I, J, contrib[k, :]) + k += 1 + else: + k += self.dm.dofs_per_element + + cdef inline void addToMatrixElemElemSymVector(self, {SCALAR}_t[:, ::1] contrib, REAL_t fac): + # Add symmetric 'contrib' to elements i and j in symmetric fashion + cdef: + INDEX_t k, p, q, I, J + for p in range(contrib.shape[0]): + for q in range(self.vectorSize): + contrib[p, q] *= fac + k = 0 + for p in range(2*self.dm.dofs_per_element): + I = self.localDoFs[p] + if I >= 0: + self.vecA.addToEntry(I, I, contrib[k, :]) + k += 1 + for q in range(p+1, 2*self.dm.dofs_per_element): + J = self.localDoFs[q] + if J >= 0: + self.vecA.addToEntry(I, J, contrib[k, :]) + self.vecA.addToEntry(J, I, contrib[k, :]) + k += 1 + else: + k += 2*self.dm.dofs_per_element-p + + cdef inline void addToMatrixElemElemVector(self, {SCALAR}_t[:, ::1] contrib, REAL_t fac): + # Add general 'contrib' to elements i and j + cdef: + INDEX_t k, p, q, I, J + for p in range(contrib.shape[0]): + for q in range(self.vectorSize): + contrib[p, q] *= fac + k = 0 + for p in range(2*self.dm.dofs_per_element): + I = self.localDoFs[p] + if I >= 0: + for q in range(2*self.dm.dofs_per_element): + J = self.localDoFs[q] + if J >= 0: + self.vecA.addToEntry(I, J, contrib[k, :]) + k += 1 + else: + k += 2*self.dm.dofs_per_element + + +cdef class {SCALAR_label}nonlocalBuilder: + def __init__(self, + meshBase mesh, + DoFMap dm, + {SCALAR_label}Kernel kernel, + dict params={}, + bint zeroExterior=True, + MPI.Comm comm=None, + FakePLogger PLogger=None, + DoFMap dm2=None, + **kwargs): + cdef: + MASK_t mask + if 'boundary' in kwargs: + warnings.warn('"boundary" parameter deprecated', DeprecationWarning) + zeroExterior = kwargs['boundary'] + + self.dm = dm + self.mesh = self.dm.mesh + assert self.dm.mesh == mesh + if dm2 is not None: + self.dm2 = dm2 + assert type(self.dm) == type(self.dm2) + assert self.dm.mesh == self.dm2.mesh + self.kernel = kernel + if self.kernel.finiteHorizon: + self.zeroExterior = False + else: + self.zeroExterior = zeroExterior + self.comm = comm + self.params = params + + assert isinstance(self.kernel.horizon, constant), "Need horizon to be constant." + assert kernel.dim == mesh.dim, "Kernel dimension must match mesh dimension" + assert kernel.dim == dm.mesh.dim, "Kernel dimension must match dm.mesh dimension" + + # volume integral + self.local_matrix = self.getLocalMatrix(params) + + if self.local_matrix.symmetricLocalMatrix: + self.contrib = uninitialized(((2*self.dm.dofs_per_element)*(2*self.dm.dofs_per_element+1)//2), dtype={SCALAR}) + else: + self.contrib = uninitialized(((2*self.dm.dofs_per_element)**2), dtype={SCALAR}) + assert self.contrib.shape[0] <= mask.size(), "Mask type size = {} is not large enough for {} entries. Please set a larger size and recompile.".format(mask.size(), self.contrib.shape[0]) + + self.local_matrix.setMesh1(self.dm.mesh) + if self.dm2 is None: + self.local_matrix.setMesh2(self.dm.mesh) + else: + self.local_matrix.setMesh2(self.dm2.mesh) + + LOGGER.debug(self.local_matrix) + + + # surface integrals + self.local_matrix_zeroExterior = self.getLocalMatrixBoundaryZeroExterior(params, infHorizon=True) + self.local_matrix_surface = self.getLocalMatrixBoundaryZeroExterior(params, infHorizon=False) + + if self.local_matrix_zeroExterior is not None: + self.local_matrix_zeroExterior.setMesh1(self.dm.mesh) + self.local_matrix_surface.setMesh1(self.dm.mesh) + if self.local_matrix_zeroExterior.symmetricLocalMatrix: + self.contribZeroExterior = uninitialized((self.dm.dofs_per_element*(self.dm.dofs_per_element+1)//2), dtype={SCALAR}) + else: + self.contribZeroExterior = uninitialized(((self.dm.dofs_per_element)**2), dtype={SCALAR}) + LOGGER.debug(self.local_matrix_zeroExterior) + LOGGER.debug(self.local_matrix_surface) + else: + self.contribZeroExterior = uninitialized((0), dtype={SCALAR}) + + + if PLogger is not None: + self.PLogger = PLogger + else: + self.PLogger = FakePLogger() + + @property + def d2c(self): + if self._d2c is None: + self._d2c = self.dm.getPatchLookup() + return self._d2c + + cdef inline {SCALAR_label}double_local_matrix_t getLocalMatrix(self, dict params): + cdef: + BOOL_t symmetric, forceNonSym + fractionalOrderBase s + target_order = params.get('target_order', None) + quadType = params.get('quadType', 'classical-refactored') + assert quadType in ( + 'classical-refactored' + ) + + forceNonSym = params.get('forceNonSym', False) + symmetric = not forceNonSym and self.kernel.symmetric + if quadType == 'classical-refactored': + if self.mesh.manifold_dim == 1: + if symmetric: + local_matrix = fractionalLaplacian1D(self.kernel, + mesh=self.mesh, + DoFMap=self.dm, + target_order=target_order) + else: + local_matrix = fractionalLaplacian1D_nonsym(self.kernel, + mesh=self.mesh, + DoFMap=self.dm, + target_order=target_order) + elif self.mesh.manifold_dim == 2: + if symmetric: + if not isinstance(self.dm, Product_DoFMap): + local_matrix = fractionalLaplacian2D(self.kernel, + mesh=self.mesh, + DoFMap=self.dm, + target_order=target_order) + else: + raise NotImplementedError() + else: + local_matrix = fractionalLaplacian2D_nonsym(self.kernel, + mesh=self.mesh, + DoFMap=self.dm, + target_order=target_order) + else: + raise NotImplementedError() + return local_matrix + + cdef inline {SCALAR_label}double_local_matrix_t getLocalMatrixBoundaryZeroExterior(self, dict params, BOOL_t infHorizon): + cdef: + fractionalOrderBase s + target_order = params.get('target_order', None) + if 'quadTypeBoundary' in params: + quadType = params['quadTypeBoundary'] + else: + quadType = params.get('quadType', 'classical-refactored') + assert quadType in ( + 'classical-refactored' + ) + + if isinstance(self.kernel, FractionalKernel): + s = self.kernel.s + assert ((s.min < 1.) and (s.max < 1.)) or ((s.min > 1.) and (s.max > 1.)) + assert isinstance(self.kernel.horizon, constant) + if infHorizon: + kernelInfHorizon = self.kernel.getModifiedKernel(horizon=constant(np.inf)) + else: + kernelInfHorizon = self.kernel + if quadType == 'classical-refactored': + kernelBoundary = kernelInfHorizon.getBoundaryKernel() + if self.mesh.manifold_dim == 1: + local_matrix = fractionalLaplacian1D_boundary(kernelBoundary, + mesh=self.mesh, + DoFMap=self.dm, + target_order=target_order) + elif self.mesh.manifold_dim == 2: + if not isinstance(self.dm, Product_DoFMap): + local_matrix = fractionalLaplacian2D_boundary(kernelBoundary, + mesh=self.mesh, + DoFMap=self.dm, + target_order=target_order) + else: + raise NotImplementedError() + else: + raise NotImplementedError() + else: + raise NotImplementedError() + else: + assert isinstance(self.kernel.horizon, constant) + if infHorizon: + kernelInfHorizon = self.kernel.getModifiedKernel(horizon=constant(np.inf)) + else: + kernelInfHorizon = self.kernel + if quadType == 'classical-refactored': + kernelBoundary = kernelInfHorizon.getBoundaryKernel() + if self.mesh.manifold_dim == 1: + local_matrix = fractionalLaplacian1D_boundary(kernelBoundary, + mesh=self.mesh, + DoFMap=self.dm, + target_order=target_order) + elif self.mesh.manifold_dim == 2: + if not isinstance(self.dm, Product_DoFMap): + local_matrix = fractionalLaplacian2D_boundary(kernelBoundary, + mesh=self.mesh, + DoFMap=self.dm, + target_order=target_order) + else: + raise NotImplementedError() + else: + raise NotImplementedError() + else: + local_matrix = None + return local_matrix + + def getSparse(self, BOOL_t returnNearField=False, str prefix=''): + cdef: + INDEX_t cellNo1, cellNo2 + {SCALAR}_t[::1] contrib = self.contrib + {SCALAR_label}IndexManager iM + REAL_t fac + BOOL_t symmetricLocalMatrix = self.local_matrix.symmetricLocalMatrix + BOOL_t symmetricCells = self.local_matrix.symmetricCells + panelType panel + BOOL_t ignoreDiagonalBlocks = False + BOOL_t doDistributedAssembly + LinearOperator A = None + BOOL_t useSymmetricMatrix + REAL_t[:, :, ::1] boxes = None + sparseGraph cells = None + REAL_t[:, ::1] coords = None + tree_node root, myRoot + list Pnear + nearFieldClusterPair cP + DoFMap treeDM + arrayIndexSet oldDoFs + indexSetIterator it + tree_node n + indexSetIterator cellIt1, cellIt2 + set newDoFs + INDEX_t dof_tree, dof, new_dof + INDEX_t[::1] translate + arrayIndexSet cells1, cells2 + sparsityPattern processedCellPairs + + if self.dm.mesh.dim == 1: + fac = 0.125 + else: + fac = 1. + self.params['minClusterSize'] = self.params.get('minClusterSize', int(fac*(self.kernel.horizonValue/self.dm.mesh.h)**self.dm.mesh.dim)) + refParams = self.getH2RefinementParams() + doDistributedAssembly = self.comm is not None and self.comm.size > 1 and self.dm.num_dofs > self.comm.size + forceUnsymmetric = self.params.get('forceUnsymmetric', doDistributedAssembly) + assembleOnRoot = self.params.get('assembleOnRoot', False) + localFarFieldIndexing = True + localFarFieldIndexing = doDistributedAssembly and not assembleOnRoot and localFarFieldIndexing + if doDistributedAssembly and not assembleOnRoot: + assert forceUnsymmetric + + # We want to capture all element x element interactions. + # We set up a temporary dofmap and construct a near field wrt that. + treeDM = dofmapFactory('P1', self.dm.mesh, -1) + with self.PLogger.Timer(prefix+'boxes, cells, coords'): + boxes, cells = getDoFBoxesAndCells(treeDM.mesh, treeDM, self.comm) + coords = treeDM.getDoFCoordinates() + + # construct the cluster tree + root, myRoot, _, doDistributedAssembly = self.getTree(doDistributedAssembly, refParams, boxes, cells, coords, allNearField=True, dm=treeDM) + + # get the covering cluster pairs + Pnear = self.getCoveringClusters(root, myRoot, doDistributedAssembly, refParams, boxes, cells, coords, assembleOnRoot=assembleOnRoot, ignoreDiagonalBlocks=ignoreDiagonalBlocks) + + # translate to original dofmap + translate = -np.ones((treeDM.num_dofs), dtype=INDEX) + for cellNo in range(treeDM.mesh.num_cells): + for dofNo in range(treeDM.dofs_per_element): + dof = self.dm.cell2dof(cellNo, dofNo) + if dof >= 0: + dof_tree = treeDM.cell2dof(cellNo, dofNo) + translate[dof_tree] = dof + + for n in root.leaves(): + oldDoFs = n._dofs + newDoFs = set() + it = oldDoFs.getIter() + while it.step(): + dof_tree = it.i + new_dof = translate[dof_tree] + if new_dof >= 0: + newDoFs.add(new_dof) + + if len(newDoFs) > 0: + newDoFsArray = np.array(list(newDoFs), dtype=INDEX) + n._dofs = arrayIndexSet(newDoFsArray) + else: + n._dofs = arrayIndexSet() + for n in root.get_tree_nodes(): + n._num_dofs = -1 + + Pnear_filtered = [] + for cP in Pnear: + if (cP.n1.get_num_dofs() > 0) or (cP.n2.get_num_dofs() > 0): + Pnear_filtered.append(cP) + Pnear = Pnear_filtered + + useSymmetricMatrix = self.local_matrix.symmetricLocalMatrix and self.local_matrix.symmetricCells and not forceUnsymmetric + + with self.PLogger.Timer(prefix+'build near field sparsity pattern'): + if myRoot is not None and doDistributedAssembly: + A = getSparseNearField(self.dm, Pnear, symmetric=useSymmetricMatrix, myRoot=myRoot) + else: + A = getSparseNearField(self.dm, Pnear, symmetric=useSymmetricMatrix) + + # We are not using assembleClusters because we don't want to use surface integration + with self.PLogger.Timer(prefix+'interior - compute'): + iM = {SCALAR_label}IndexManager(self.dm, A) + processedCellPairs = sparsityPattern(self.dm.mesh.num_cells) + + for cP in Pnear: + cells1 = cP.n1.cells + cells2 = cP.n2.cells + cellIt1 = cells1.getIter() + cellIt2 = cells2.getIter() + while cellIt1.step(): + cellNo1 = cellIt1.i + self.local_matrix.setCell1(cellNo1) + cellIt2.reset() + while cellIt2.step(): + cellNo2 = cellIt2.i + if processedCellPairs.findIndex(cellNo1, cellNo2): + continue + processedCellPairs.add(cellNo1, cellNo2) + + processedCellPairs.add(cellNo1, cellNo2) + self.local_matrix.setCell2(cellNo2) + if iM.getDoFsElemElem(cellNo1, cellNo2): + continue + panel = self.local_matrix.getPanelType() + if cellNo1 == cellNo2: + if panel != IGNORED: + self.local_matrix.eval(contrib, panel) + if symmetricLocalMatrix: + iM.addToMatrixElemElemSym(contrib, 1.) + else: + iM.addToMatrixElemElem(contrib, 1.) + else: + if symmetricCells: + if panel != IGNORED: + self.local_matrix.eval(contrib, panel) + # If the kernel is symmetric, the contributions from (cellNo1, cellNo2) and (cellNo2, cellNo1) + # are the same. We multiply by 2 to account for the contribution from cells (cellNo2, cellNo1). + if symmetricLocalMatrix: + iM.addToMatrixElemElemSym(contrib, 2.) + else: + iM.addToMatrixElemElem(contrib, 2.) + else: + if panel != IGNORED: + self.local_matrix.eval(contrib, panel) + if symmetricLocalMatrix: + iM.addToMatrixElemElemSym(contrib, 1.) + else: + iM.addToMatrixElemElem(contrib, 1.) + self.local_matrix.swapCells() + panel = self.local_matrix.getPanelType() + if panel != IGNORED: + if iM.getDoFsElemElem(cellNo2, cellNo1): + continue + self.local_matrix.eval(contrib, panel) + if symmetricLocalMatrix: + iM.addToMatrixElemElemSym(contrib, 1.) + else: + iM.addToMatrixElemElem(contrib, 1.) + self.local_matrix.swapCells() + + if doDistributedAssembly and assembleOnRoot: + with self.PLogger.Timer('reduceNearOp'): + A = self.reduceNearOp(A, myRoot.get_dofs()) + if localFarFieldIndexing: + _, local_dm, lclR, lclP = self.doLocalFarFieldIndexing(myRoot, boxes) + if self.comm is None or (assembleOnRoot and self.comm.rank == 0) or (not assembleOnRoot): + if self.comm is None or (assembleOnRoot and self.comm.rank == 0): + if returnNearField: + return A, Pnear + else: + return A + else: + with self.PLogger.Timer('setup distributed op'): + if not localFarFieldIndexing: + raise NotImplementedError() + else: + dist_A = DistributedLinearOperator(A, root, Pnear, self.comm, self.dm, local_dm, lclR, lclP) + if returnNearField: + return dist_A, Pnear + else: + return dist_A + else: + if returnNearField: + return A, Pnear + else: + return A + + def getDense(self, BOOL_t trySparsification=False): + cdef: + INDEX_t cellNo1, cellNo2 + {SCALAR_label}LinearOperator A = None + {SCALAR}_t[::1] contrib = self.contrib, contribZeroExterior = self.contribZeroExterior + INDEX_t start, end + meshBase surface + {SCALAR_label}IndexManager iM + INDEX_t i, j, explicitZerosRow + np.int64_t explicitZeros + REAL_t[:, ::1] data + REAL_t sparsificationThreshold = 0.8 + BOOL_t symmetricLocalMatrix = self.local_matrix.symmetricLocalMatrix + BOOL_t symmetricCells = self.local_matrix.symmetricCells + MASK_t mask + + if self.comm: + start = np.ceil(self.mesh.num_cells*self.comm.rank/self.comm.size) + end = np.ceil(self.mesh.num_cells*(self.comm.rank+1)/self.comm.size) + else: + start = 0 + end = self.mesh.num_cells + + if (trySparsification + and (self.comm is None or self.comm.size == 1) + and not self.zeroExterior + and self.dm2 is None + and self.kernel.finiteHorizon + and (self.mesh.volume*(1.-sparsificationThreshold) > self.kernel.getHorizonValue()**self.mesh.dim)): + + with self.PLogger.Timer('build sparsity pattern'): + + sP = sparsityPattern(self.dm.num_dofs) + iM = {SCALAR_label}IndexManager(self.dm, None, sP=sP) + + for cellNo1 in range(start, end): + self.local_matrix.setCell1(cellNo1) + for cellNo2 in range(cellNo1, self.mesh.num_cells): + self.local_matrix.setCell2(cellNo2) + if iM.getDoFsElemElem(cellNo1, cellNo2): + continue + panel = self.local_matrix.getPanelType() + if cellNo1 == cellNo2: + if panel != IGNORED: + if self.local_matrix.symmetricLocalMatrix: + iM.addToSparsityElemElemSym() + else: + iM.addToSparsityElemElem() + else: + if self.local_matrix.symmetricCells: + if panel != IGNORED: + if self.local_matrix.symmetricLocalMatrix: + iM.addToSparsityElemElemSym() + else: + iM.addToSparsityElemElem() + else: + if panel != IGNORED: + if self.local_matrix.symmetricLocalMatrix: + iM.addToSparsityElemElemSym() + else: + iM.addToSparsityElemElem() + self.local_matrix.swapCells() + panel = self.local_matrix.getPanelType() + if panel != IGNORED: + if iM.getDoFsElemElem(cellNo2, cellNo1): + continue + if self.local_matrix.symmetricLocalMatrix: + iM.addToSparsityElemElemSym() + else: + iM.addToSparsityElemElem() + self.local_matrix.swapCells() + indptr, indices = sP.freeze() + useSymmetricMatrix = self.local_matrix.symmetricLocalMatrix and self.local_matrix.symmetricCells + if useSymmetricMatrix: + A = {SCALAR_label}SSS_LinearOperator(indices, indptr, + np.zeros((indices.shape[0]), dtype={SCALAR}), + np.zeros((self.dm.num_dofs), dtype={SCALAR})) + ratio = ((A.nnz+A.num_rows)/REAL(A.num_rows))/REAL(A.num_columns) + else: + A = {SCALAR_label}CSR_LinearOperator(indices, indptr, + np.zeros((indices.shape[0]), dtype=REAL)) + ratio = (A.nnz/REAL(A.num_rows))/REAL(A.num_columns) + LOGGER.warning('Assembling into sparse{} matrix, since {}% of entries are zero.'.format(', symmetric' if useSymmetricMatrix else '', + 100.*(1.-ratio))) + trySparsification = False + else: + if self.dm2 is None: + A = {SCALAR_label}Dense_LinearOperator(np.zeros((self.dm.num_dofs, self.dm.num_dofs), dtype={SCALAR})) + else: + A = {SCALAR_label}Dense_LinearOperator(np.zeros((self.dm.num_dofs, self.dm2.num_dofs), dtype={SCALAR})) + + if self.dm2 is None: + iM = {SCALAR_label}IndexManager(self.dm, A) + else: + LOGGER.warning('Efficiency of assembly with 2 DoFMaps is bad.') + dmCombined = self.dm.combine(self.dm2) + B = SubMatrixAssemblyOperator(A, + np.arange(self.dm.num_dofs, dtype=INDEX), + np.arange(self.dm.num_dofs, self.dm.num_dofs+self.dm2.num_dofs, dtype=INDEX)) + iM = {SCALAR_label}IndexManager(dmCombined, B) + + # Omega x Omega + with self.PLogger.Timer('interior'): + for cellNo1 in range(start, end): + self.local_matrix.setCell1(cellNo1) + for cellNo2 in range(cellNo1, self.mesh.num_cells): + self.local_matrix.setCell2(cellNo2) + if iM.getDoFsElemElem(cellNo1, cellNo2): + continue + panel = self.local_matrix.getPanelType() + if cellNo1 == cellNo2: + if panel != IGNORED: + self.local_matrix.eval(contrib, panel) + if symmetricLocalMatrix: + iM.addToMatrixElemElemSym(contrib, 1.) + else: + iM.addToMatrixElemElem(contrib, 1.) + else: + if symmetricCells: + if panel != IGNORED: + self.local_matrix.eval(contrib, panel) + # If the kernel is symmetric, the contributions from (cellNo1, cellNo2) and (cellNo2, cellNo1) + # are the same. We multiply by 2 to account for the contribution from cells (cellNo2, cellNo1). + if symmetricLocalMatrix: + iM.addToMatrixElemElemSym(contrib, 2.) + else: + iM.addToMatrixElemElem(contrib, 2.) + else: + if panel != IGNORED: + self.local_matrix.eval(contrib, panel) + if symmetricLocalMatrix: + iM.addToMatrixElemElemSym(contrib, 1.) + else: + iM.addToMatrixElemElem(contrib, 1.) + self.local_matrix.swapCells() + panel = self.local_matrix.getPanelType() + if panel != IGNORED: + if iM.getDoFsElemElem(cellNo2, cellNo1): + continue + self.local_matrix.eval(contrib, panel) + if symmetricLocalMatrix: + iM.addToMatrixElemElemSym(contrib, 1.) + else: + iM.addToMatrixElemElem(contrib, 1.) + self.local_matrix.swapCells() + + # Omega x Omega^C + if self.zeroExterior: + with self.PLogger.Timer('zeroExterior'): + surface = self.mesh.get_surface_mesh() + + self.local_matrix_zeroExterior.setMesh2(surface) + + for cellNo1 in range(start, end): + iM.getDoFsElem(cellNo1) + mask = iM.getElemSymMask() + self.local_matrix_zeroExterior.setCell1(cellNo1) + for cellNo2 in range(surface.num_cells): + self.local_matrix_zeroExterior.setCell2(cellNo2) + panel = self.local_matrix_zeroExterior.getPanelType() + self.local_matrix_zeroExterior.eval(contribZeroExterior, panel, mask) + # if local_matrix_zeroExterior.symmetricLocalMatrix: + iM.addToMatrixElemSym(contribZeroExterior, 1.) + # else: + # raise NotImplementedError() + if self.comm: + self.comm.Allreduce(MPI.IN_PLACE, A.data) + if trySparsification: + explicitZeros = 0 + data = A.data + nr = A.num_rows + for i in range(A.num_rows): + explicitZerosRow = 0 + for j in range(A.num_columns): + if data[i, j] == 0.: + explicitZerosRow += 1 + explicitZeros += explicitZerosRow + if not (explicitZerosRow > sparsificationThreshold*A.num_columns): + nr = i+1 + break + ratio = (explicitZeros/REAL(nr))/REAL(A.num_columns) + if ratio > sparsificationThreshold: + LOGGER.warning('Converting dense to sparse matrix, since {}% of entries are zero.'.format(100.*ratio)) + return CSR_LinearOperator.from_dense(A) + else: + LOGGER.warning('Not converting dense to sparse matrix, since only {}% of entries are zero.'.format(100.*ratio)) + return A + + def getDenseVector(self, BOOL_t trySparsification=False): + cdef: + INDEX_t cellNo1, cellNo2 + VectorLinearOperator A = None + {SCALAR}_t[:, ::1] contrib = self.contribVector, contribZeroExterior = self.contribZeroExteriorVector + INDEX_t start, end + meshBase surface + {SCALAR_label}IndexManagerVector iM + INDEX_t i, j, explicitZerosRow + np.int64_t explicitZeros + REAL_t[:, ::1] data + REAL_t sparsificationThreshold = 0.8 + BOOL_t symmetricLocalMatrix = self.local_matrix.symmetricLocalMatrix + BOOL_t symmetricCells = self.local_matrix.symmetricCells + MASK_t mask + + if self.comm: + start = np.ceil(self.mesh.num_cells*self.comm.rank/self.comm.size) + end = np.ceil(self.mesh.num_cells*(self.comm.rank+1)/self.comm.size) + else: + start = 0 + end = self.mesh.num_cells + + + if self.dm2 is None: + A = Dense_VectorLinearOperator(np.zeros((self.dm.num_dofs, self.dm.num_dofs, self.kernel.vectorSize), dtype=REAL)) + else: + A = Dense_VectorLinearOperator(np.zeros((self.dm.num_dofs, self.dm2.num_dofs, self.kernel.vectorSize), dtype=REAL)) + + if self.dm2 is None: + iM = {SCALAR_label}IndexManagerVector(self.dm, A) + else: + LOGGER.warning('Efficiency of assembly with 2 DoFMaps is bad.') + dmCombined = self.dm.combine(self.dm2) + B = SubMatrixAssemblyOperator(A, + np.arange(self.dm.num_dofs, dtype=INDEX), + np.arange(self.dm.num_dofs, self.dm.num_dofs+self.dm2.num_dofs, dtype=INDEX)) + iM = {SCALAR_label}IndexManagerVector(dmCombined, B) + + # Omega x Omega + with self.PLogger.Timer('interior'): + for cellNo1 in range(start, end): + self.local_matrix.setCell1(cellNo1) + for cellNo2 in range(cellNo1, self.mesh.num_cells): + self.local_matrix.setCell2(cellNo2) + if iM.getDoFsElemElem(cellNo1, cellNo2): + continue + panel = self.local_matrix.getPanelType() + if cellNo1 == cellNo2: + if panel != IGNORED: + self.local_matrix.evalVector(contrib, panel) + if symmetricLocalMatrix: + iM.addToMatrixElemElemSymVector(contrib, 1.) + else: + iM.addToMatrixElemElemVector(contrib, 1.) + else: + if symmetricCells: + if panel != IGNORED: + self.local_matrix.evalVector(contrib, panel) + # If the kernel is symmetric, the contributions from (cellNo1, cellNo2) and (cellNo2, cellNo1) + # are the same. We multiply by 2 to account for the contribution from cells (cellNo2, cellNo1). + if symmetricLocalMatrix: + iM.addToMatrixElemElemSymVector(contrib, 2.) + else: + iM.addToMatrixElemElemVector(contrib, 2.) + else: + if panel != IGNORED: + self.local_matrix.evalVector(contrib, panel) + if symmetricLocalMatrix: + iM.addToMatrixElemElemSymVector(contrib, 1.) + else: + iM.addToMatrixElemElemVector(contrib, 1.) + self.local_matrix.swapCells() + panel = self.local_matrix.getPanelType() + if panel != IGNORED: + if iM.getDoFsElemElem(cellNo2, cellNo1): + continue + self.local_matrix.evalVector(contrib, panel) + if symmetricLocalMatrix: + iM.addToMatrixElemElemSymVector(contrib, 1.) + else: + iM.addToMatrixElemElemVector(contrib, 1.) + self.local_matrix.swapCells() + + # Omega x Omega^C + if self.zeroExterior: + with self.PLogger.Timer('zeroExterior'): + surface = self.mesh.get_surface_mesh() + + self.local_matrix_zeroExterior.setMesh2(surface) + + for cellNo1 in range(start, end): + iM.getDoFsElem(cellNo1) + mask = iM.getElemSymMask() + self.local_matrix_zeroExterior.setCell1(cellNo1) + for cellNo2 in range(surface.num_cells): + self.local_matrix_zeroExterior.setCell2(cellNo2) + panel = self.local_matrix_zeroExterior.getPanelType() + self.local_matrix_zeroExterior.evalVector(contribZeroExterior, panel, mask) + # if local_matrix_zeroExterior.symmetricLocalMatrix: + iM.addToMatrixElemSymVector(contribZeroExterior, 1.) + # else: + # raise NotImplementedError() + if self.comm: + self.comm.Allreduce(MPI.IN_PLACE, A.data) + return A + + cpdef {SCALAR}_t getEntryCluster(self, INDEX_t I, INDEX_t J): + cdef: + tree_node n1, n2, n3 + list clusters = [] + nearFieldClusterPair c1, c2, c3 + arrayIndexSet aI1, aI2, aI3 + REAL_t[:, :, ::1] fake_boxes = uninitialized((0, 0, 0), dtype=REAL) + INDEX_t[::1] I_view = np.array([I], dtype=INDEX) + INDEX_t[::1] J_view = np.array([J], dtype=INDEX) + arrayIndexSetIterator it = arrayIndexSetIterator() + list d2c = self.d2c + LinearOperator A + REAL_t[:, ::1] mat = np.zeros((1, 1), dtype=REAL) + if I == J: + aI3 = arrayIndexSet(I_view) + n3 = tree_node(None, aI3, fake_boxes) + + cells = set() + it.setIndexSet(aI3) + while it.step(): + cells |= d2c[it.i] + n3._cells = arrayIndexSet() + n3._cells.fromSet(cells) + + c3 = nearFieldClusterPair(n3, n3) + c3.set_cells() + clusters.append(c3) + else: + aI1 = arrayIndexSet(I_view) + aI2 = arrayIndexSet(J_view) + n1 = tree_node(None, aI1, fake_boxes) + n2 = tree_node(None, aI2, fake_boxes) + + cells = set() + it.setIndexSet(aI1) + while it.step(): + cells |= d2c[it.i] + n1._cells = arrayIndexSet() + n1._cells.fromSet(cells) + + cells = set() + it.setIndexSet(aI2) + while it.step(): + cells |= d2c[it.i] + n2._cells = arrayIndexSet() + n2._cells.fromSet(cells) + + c1 = nearFieldClusterPair(n1, n2) + c1.set_cells() + c2 = nearFieldClusterPair(n2, n1) + c2.set_cells() + clusters.append(c1) + clusters.append(c2) + A = Dense_SubBlock_LinearOperator(I_view, + J_view, + self.dm.num_dofs, + self.dm.num_dofs, + mat) + self.assembleClusters(clusters, Anear=A) + return mat[0, 0] + + cpdef {SCALAR}_t getEntry(self, INDEX_t I, INDEX_t J): + cdef: + INDEX_t cellNo1, cellNo2 + INDEX_t[:,::1] surface_cells + MASK_t mask + indexSet cellsUnion = arrayIndexSet() + indexSet cellsInter = arrayIndexSet() + indexSetIterator it1 = arrayIndexSetIterator() + indexSetIterator it2 = arrayIndexSetIterator() + dm = self.dm + {SCALAR}_t entry = 0. + cellsUnion.fromSet(self.d2c[I] | self.d2c[J]) + cellsInter.fromSet(self.d2c[I] & self.d2c[J]) + + assert isinstance(self.kernel.horizon, constant) and self.kernel.horizon.value == np.inf + + # (supp phi_I \cup supp phi_J)^2 + it1.setIndexSet(cellsUnion) + it2.setIndexSet(cellsUnion) + while it1.step(): + cellNo1 = it1.i + self.local_matrix.setCell1(cellNo1) + it2.reset() + while it2.step(): + cellNo2 = it2.i + if cellNo2 < cellNo1: + continue + mask = getElemElemSymMask(dm, cellNo1, cellNo2, I, J) + if mask.none(): + continue + self.local_matrix.setCell2(cellNo2) + panel = self.local_matrix.getPanelType() + if cellNo1 == cellNo2: + self.local_matrix.eval(self.contrib, panel, mask) + if self.local_matrix.symmetricLocalMatrix: + entry += extractElemElemSymMasked{SCALAR_label}(dm, self.contrib, 1., mask) + else: + raise NotImplementedError() + else: + if self.local_matrix.symmetricCells: + if panel != IGNORED: + self.local_matrix.eval(self.contrib, panel, mask) + # multiply by 2 to account for the contribution from cells (cellNo2, cellNo1) + if self.local_matrix.symmetricLocalMatrix: + entry += extractElemElemSymMasked{SCALAR_label}(dm, self.contrib, 2., mask) + else: + raise NotImplementedError() + else: + if panel != IGNORED: + self.local_matrix.eval(self.contrib, panel, mask) + # multiply by 2 to account for the contribution from cells (cellNo2, cellNo1) + if self.local_matrix.symmetricLocalMatrix: + entry += extractElemElemSymMasked{SCALAR_label}(dm, self.contrib, 1., mask) + else: + raise NotImplementedError() + self.local_matrix.swapCells() + mask = getElemElemSymMask(dm, cellNo2, cellNo1, I, J) + panel = self.local_matrix.getPanelType() + if panel != IGNORED: + self.local_matrix.eval(self.contrib, panel, mask) + if self.local_matrix.symmetricLocalMatrix: + entry += extractElemElemSymMasked{SCALAR_label}(dm, self.contrib, 1., mask) + else: + raise NotImplementedError() + # (supp phi_I \cup supp phi_J) x (supp phi_I \cup supp phi_J)^C + if not self.kernel.variable: + if self.zeroExterior: + # zeroExterior of (supp phi_I \cup supp phi_J) + if self.mesh.dim == 1: + surface_cells = boundaryVertices(self.mesh.cells, cellsUnion) + elif self.mesh.dim == 2: + surface_cells = boundaryEdges(self.mesh.cells, cellsUnion) + else: + raise NotImplementedError() + + self.local_matrix_zeroExterior.setVerticesCells2(self.mesh.vertices, surface_cells) + + it1.setIndexSet(cellsInter) + while it1.step(): + cellNo1 = it1.i + self.local_matrix_zeroExterior.setCell1(cellNo1) + mask = getElemSymMask(dm, cellNo1, I, J) + for cellNo2 in range(surface_cells.shape[0]): + self.local_matrix_zeroExterior.setCell2(cellNo2) + panel = self.local_matrix_zeroExterior.getPanelType() + self.local_matrix_zeroExterior.eval(self.contribZeroExterior, panel) + entry += extractElemSymMasked{SCALAR_label}(dm, self.contribZeroExterior, 1., mask) + else: + # (supp phi_I \cup supp phi_J) x (Omega \ (supp phi_I \cup supp phi_J)) + # TODO: This can be done using surface integrals instead + it1.setIndexSet(cellsUnion) + while it1.step(): + cellNo1 = it1.i + self.local_matrix.setCell1(cellNo1) + + for cellNo2 in set(range(self.mesh.num_cells))-cellsUnion.toSet(): + self.local_matrix.setCell2(cellNo2) + mask = getElemElemSymMask(dm, cellNo1, cellNo2, I, J) + panel = self.local_matrix.getPanelType() + if panel != IGNORED: + if self.local_matrix.symmetricLocalMatrix: + # multiply by 2 to account for the 2 symmetric contributions + self.local_matrix.eval(self.contrib, panel) + entry += extractElemElemSymMasked{SCALAR_label}(dm, self.contrib, 1., mask) + else: + raise NotImplementedError() + + if self.zeroExterior: + # (supp phi_I \cup supp phi_J) x Omega^C + surface = self.mesh.get_surface_mesh() + self.local_matrix_zeroExterior.setMesh2(surface) + + it1.setIndexSet(cellsInter) + while it1.step(): + cellNo1 = it1.i + self.local_matrix_zeroExterior.setCell1(cellNo1) + mask = getElemSymMask(dm, cellNo1, I, J) + for cellNo2 in range(surface.num_cells): + self.local_matrix_zeroExterior.setCell2(cellNo2) + panel = self.local_matrix_zeroExterior.getPanelType() + self.local_matrix_zeroExterior.eval(self.contribZeroExterior, panel) + entry += extractElemSymMasked{SCALAR_label}(dm, self.contribZeroExterior, 1., mask) + return entry + + cpdef LinearOperator assembleClusters(self, list Pnear, bint forceUnsymmetric=False, LinearOperator Anear=None, dict jumps={}, str prefix='', tree_node myRoot=None, BOOL_t doDistributedAssembly=False): + cdef: + INDEX_t cellNo1, cellNo2, cellNo3 + REAL_t fac + {SCALAR}_t[::1] contrib = self.contrib, contribZeroExterior = self.contribZeroExterior + meshBase surface + INDEX_t[:, ::1] cells = self.mesh.cells, surface_cells, fake_cells + indexSet cellsInter + indexSet clusterDofs1, clusterDofs2 + FilteredAssemblyOperator Anear_filtered = None + INDEX_t[::1] cellPair = uninitialized((2), dtype=INDEX) + nearFieldClusterPair cluster + panelType panel + tupleDictMASK masks = None + ENCODE_t hv, hv2 + MASK_t mask + # INDEX_t vertex1, vertex2 + bint useSymmetricMatrix + bint useSymmetricCells + INDEX_t vertexNo, i + INDEX_t[::1] edge = uninitialized((2), dtype=INDEX) + REAL_t evalShift = 1e-9 + local_matrix_t mass + indexSetIterator it = arrayIndexSetIterator() + INDEX_t startCluster + INDEX_t numAssembledCells + indexSet myDofs = None + REAL_t sValuePre, sValuePost + BOOL_t surfaceIntegralNeedsShift + + mask.reset() + + if myRoot is not None: + myDofs = myRoot.get_dofs() + + if Anear is None: + useSymmetricMatrix = self.local_matrix.symmetricLocalMatrix and self.local_matrix.symmetricCells and not forceUnsymmetric + with self.PLogger.Timer(prefix+'build near field sparsity pattern'): + # TODO: double check that this should not be + if myRoot is not None and doDistributedAssembly: + Anear = getSparseNearField(self.dm, Pnear, symmetric=useSymmetricMatrix, myRoot=myRoot) + else: + Anear = getSparseNearField(self.dm, Pnear, symmetric=useSymmetricMatrix) + LOGGER.info('Anear: {}'.format(Anear)) + + if self.comm is not None and self.comm.size > 1: + nnz = Anear.nnz + counts = np.zeros((self.comm.size), dtype=INDEX) + self.comm.Gather(np.array([nnz], dtype=INDEX), counts) + LOGGER.info('Near field entries per rank: {} ({}) / {} / {} ({})'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax())) + + Anear_filtered = FilteredAssemblyOperator(Anear) + + useSymmetricCells = self.local_matrix.symmetricCells + + iM = {SCALAR_label}IndexManager(self.dm, Anear) + + use_masks = self.params.get('use_masks', True) + + with self.PLogger.Timer(prefix+'interior'): + # This corresponds to + # C(d,s) \int_D \int_D (u(x)-u(y)) (v(x)-v(y)) /|x-y|^{d+2s} + # where + # D = (supp u) \cup (supp v)., + # We only update unknowns that are in the cluster pair. + + if not use_masks: + # This loop does the correct thing, but we are wasting a lot of + # element x element evaluations. + for cluster in Pnear: + cellsUnion = cluster.cellsUnion + + clusterDofs1 = cluster.n1.get_dofs() + clusterDofs2 = cluster.n2.get_dofs() + Anear_filtered.setFilter(clusterDofs1, clusterDofs2) + iM = {SCALAR_label}IndexManager(self.dm, Anear_filtered) + + for cellNo1 in cellsUnion: + self.local_matrix.setCell1(cellNo1) + for cellNo2 in cellsUnion: + self.local_matrix.setCell2(cellNo2) + panel = self.local_matrix.getPanelType() + if panel != IGNORED: + if useSymmetricCells and (cellNo1 != cellNo2): + fac = 2. + else: + fac = 1. + if iM.getDoFsElemElem(cellNo1, cellNo2): + continue + self.local_matrix.eval(contrib, panel) + if useSymmetricCells: + iM.addToMatrixElemElemSym(contrib, fac) + else: + iM.addToMatrixElemElem(contrib, fac) + else: + # Pre-record all element x element contributions. + # This way, we only assembly over each element x element pair once. + # We load balance the cells and only get the list for the local rank. + startCluster = 0 + numAssembledCells = 0 + while startCluster < len(Pnear): + with self.PLogger.Timer(prefix+'interior - build masks'): + masks = iM.buildMasksForClusters(Pnear, useSymmetricCells, &startCluster) + + if (masks.getSizeInBytes() >> 20) > 20: + LOGGER.info('element x element pairs {}, {} MB'.format(masks.nnz, masks.getSizeInBytes() >> 20)) + # Compute all element x element contributions + with self.PLogger.Timer(prefix+'interior - compute'): + masks.startIter() + while masks.next(cellPair, &mask): + cellNo1 = cellPair[0] + cellNo2 = cellPair[1] + self.local_matrix.setCell1(cellNo1) + self.local_matrix.setCell2(cellNo2) + panel = self.local_matrix.getPanelType() + if panel != IGNORED: + numAssembledCells += 1 + if useSymmetricCells and (cellNo1 != cellNo2): + fac = 2. + else: + fac = 1. + if iM.getDoFsElemElem(cellNo1, cellNo2): + continue + self.local_matrix.eval(contrib, panel, mask) + if useSymmetricCells: + iM.addToMatrixElemElemSymMasked(contrib, fac, mask) + else: + iM.addToMatrixElemElemMasked(contrib, fac, mask) + masks = None + if self.comm is not None and self.comm.size > 1: + counts = np.zeros((self.comm.size), dtype=INDEX) + self.comm.Gather(np.array([numAssembledCells], dtype=INDEX), counts) + if self.comm.rank == 0: + LOGGER.info('Num assembled cells per rank: {} ({}) / {} / {} ({}) imbalance: {}'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax(), counts.max()/counts.min())) + + if not self.kernel.variable: + if not self.kernel.complement: + with self.PLogger.Timer(prefix+'cluster zeroExterior'): + # This corresponds to + # C(d,s)/(2s) \int_D u(x) v(x) \int_E n.(x-y)/|x-y|^{d+2s} + # where + # D = (supp u) \cap (supp v) \subset E, + # E = \partial((supp u) \cup (supp v)). + # We only update unknowns that are in the cluster pair. + + iM = {SCALAR_label}IndexManager(self.dm, Anear_filtered) + + for cluster in Pnear: + + cellsInter = cluster.cellsInter + if len(cellsInter) == 0: + continue + + clusterDofs1 = cluster.n1.get_dofs() + clusterDofs2 = cluster.n2.get_dofs() + + # surface of the union of clusters n1 and n2 + if self.mesh.dim == 1: + surface_cells = boundaryVertices(cells, cluster.cellsUnion) + elif self.mesh.dim == 2: + surface_cells = boundaryEdges(cells, cluster.cellsUnion) + else: + raise NotImplementedError() + + Anear_filtered.setFilter(clusterDofs1, clusterDofs2) + + self.local_matrix_zeroExterior.setVerticesCells2(self.mesh.vertices, surface_cells) + + it.setIndexSet(cellsInter) + while it.step(): + cellNo1 = it.i + self.local_matrix_zeroExterior.setCell1(cellNo1) + iM.getDoFsElem(cellNo1) + mask = iM.getElemSymMask() + for cellNo2 in range(surface_cells.shape[0]): + self.local_matrix_zeroExterior.setCell2(cellNo2) + panel = self.local_matrix_zeroExterior.getPanelType() + self.local_matrix_zeroExterior.eval(contribZeroExterior, panel, mask) + if self.local_matrix_zeroExterior.symmetricLocalMatrix: + iM.addToMatrixElemSym(contribZeroExterior, 1.) + else: + raise NotImplementedError() + if not self.zeroExterior and not self.kernel.finiteHorizon: + with self.PLogger.Timer(prefix+'zeroExterior'): + # Subtract the zeroExterior contribution for Omega x Omega^C that was added in the previous loop. + # This is for the regional fractional Laplacian. + surface = self.mesh.get_surface_mesh() + iM = {SCALAR_label}IndexManager(self.dm, Anear, myDofs=myDofs) + + self.local_matrix_zeroExterior.setMesh2(surface) + + for cellNo1 in range(self.mesh.num_cells): + self.local_matrix_zeroExterior.setCell1(cellNo1) + iM.getDoFsElem(cellNo1) + mask = iM.getElemSymMask() + for cellNo2 in range(surface.num_cells): + self.local_matrix_zeroExterior.setCell2(cellNo2) + panel = self.local_matrix_zeroExterior.getPanelType() + self.local_matrix_zeroExterior.eval(contribZeroExterior, panel, mask) + if self.local_matrix_zeroExterior.symmetricLocalMatrix: + iM.addToMatrixElemSym(contribZeroExterior, -1.) + else: + raise NotImplementedError() + elif not self.zeroExterior and self.kernel.finiteHorizon: + with self.PLogger.Timer(prefix+'zeroExterior'): + # Subtract the zeroExterior contribution for Omega x Omega^C that was added in the previous loop. + # This is for the regional fractional Laplacian. + + if self.mesh.dim == 1: + vol = 2 + elif self.mesh.dim == 2: + vol = 2*np.pi * self.kernel.horizonValue + else: + raise NotImplementedError() + x = np.zeros((self.mesh.dim), dtype=REAL) + y = np.zeros((self.mesh.dim), dtype=REAL) + y[0] = self.kernel.horizonValue + coeff = constant(-vol*self.local_matrix_zeroExterior.kernel(x, y)) + qr = simplexXiaoGimbutas(2, self.mesh.dim) + if self.mesh.dim == 1: + mass = mass_1d_sym_scalar_anisotropic(coeff, self.dm, qr) + elif self.mesh.dim == 2: + mass = mass_2d_sym_scalar_anisotropic(coeff, self.dm, qr) + else: + raise NotImplementedError() + + if myDofs is not None: + Anear_filtered2 = LeftFilteredAssemblyOperator(Anear) + Anear_filtered2.setFilter(myDofs) + assembleMatrix(self.mesh, self.dm, mass, A=Anear_filtered2) + else: + assembleMatrix(self.mesh, self.dm, mass, A=Anear) + + elif self.zeroExterior and not self.kernel.complement: + with self.PLogger.Timer(prefix+'zeroExterior'): + # Add the zeroExterior contribution for Omega x Omega^C. + surface = self.mesh.get_surface_mesh() + iM = {SCALAR_label}IndexManager(self.dm, Anear, myDofs=myDofs) + self.local_matrix_zeroExterior.setMesh2(surface) + + for cellNo1 in range(self.mesh.num_cells): + self.local_matrix_zeroExterior.setCell1(cellNo1) + iM.getDoFsElem(cellNo1) + mask = iM.getElemSymMask() + for cellNo2 in range(surface.num_cells): + self.local_matrix_zeroExterior.setCell2(cellNo2) + panel = self.local_matrix_zeroExterior.getPanelType() + self.local_matrix_zeroExterior.eval(contribZeroExterior, panel, mask) + iM.addToMatrixElemSym(contribZeroExterior, 1.) + + else: + surfaceIntegralNeedsShift = not isinstance(self.kernel.s, singleVariableUnsymmetricFractionalOrder) + + if not self.kernel.complement: + # This corresponds to + # \int_D \int_E u(x) v(x) C(d, s) / |x-y|^{d+2s} + # where + # D = (supp u) \cap (supp v) \subset E, + # E = Omega \ ((supp u) \cup (supp v)). + # We only update unknowns that are in the cluster pair. + with self.PLogger.Timer(prefix+'cluster exterior'): + iM = {SCALAR_label}IndexManager(self.dm, Anear_filtered) + + fake_cells = uninitialized((1, self.mesh.dim), dtype=INDEX) + for cluster in Pnear: + + cellsInter = cluster.cellsInter + if len(cellsInter) == 0: + continue + + clusterDofs1 = cluster.n1.get_dofs() + clusterDofs2 = cluster.n2.get_dofs() + + Anear_filtered.setFilter(clusterDofs1, clusterDofs2) + + if not self.kernel.complement: + + # surface of the union of clusters n1 and n2 + if self.mesh.dim == 1: + surface_cells = boundaryVertices(cells, cluster.cellsUnion) + elif self.mesh.dim == 2: + surface_cells = boundaryEdges(cells, cluster.cellsUnion) + else: + raise NotImplementedError() + self.local_matrix_surface.setVerticesCells2(self.mesh.vertices, surface_cells) + + it.setIndexSet(cellsInter) + while it.step(): + cellNo1 = it.i + self.local_matrix_surface.setCell1(cellNo1) + iM.getDoFsElem(cellNo1) + mask = iM.getElemSymMask() + for cellNo2 in range(surface_cells.shape[0]): + self.local_matrix_surface.setCell2(cellNo2) + if surfaceIntegralNeedsShift: + if self.mesh.dim == 1: + if self.local_matrix_surface.center1[0] < self.local_matrix_surface.center2[0]: + self.local_matrix_surface.center2[0] += evalShift + else: + self.local_matrix_surface.center2[0] -= evalShift + elif self.mesh.dim == 2: + self.local_matrix_surface.center2[0] += evalShift*(self.local_matrix_surface.simplex2[1, 1]-self.local_matrix_surface.simplex2[0, 1]) + self.local_matrix_surface.center2[1] -= evalShift*(self.local_matrix_surface.simplex2[1, 0]-self.local_matrix_surface.simplex2[0, 0]) + panel = self.local_matrix_surface.getPanelType() + if panel != IGNORED: + self.local_matrix_surface.eval(contribZeroExterior, panel, mask) + # if self.local_matrix_surface.symmetricLocalMatrix: + iM.addToMatrixElemSym(contribZeroExterior, 1.) + # else: + # print('here', np.array(contribZeroExterior)) + # iM.addToMatrixElem(contribZeroExterior, 1.) + # integrate all the jump interfaces + for hv in jumps: + decode_edge(hv, cellPair) + if not (cluster.cellsUnion.inSet(cellPair[0]) or + cluster.cellsUnion.inSet(cellPair[1])): + if self.mesh.dim == 1: + fake_cells[0, 0] = jumps[hv] + elif self.mesh.dim == 2: + hv2 = jumps[hv] + decode_edge(hv2, edge) + for vertexNo in range(self.mesh.dim): + fake_cells[0, vertexNo] = edge[vertexNo] + else: + raise NotImplementedError() + self.local_matrix_surface.setVerticesCells2(self.mesh.vertices, fake_cells) + self.local_matrix_surface.setCell2(0) + + if surfaceIntegralNeedsShift: + if self.mesh.dim == 1: + self.local_matrix_surface.center2[0] += evalShift + elif self.mesh.dim == 2: + self.local_matrix_surface.center2[0] += evalShift*(self.local_matrix_surface.simplex2[1, 1]-self.local_matrix_surface.simplex2[0, 1]) + self.local_matrix_surface.center2[1] += evalShift*(self.local_matrix_surface.simplex2[0, 0]-self.local_matrix_surface.simplex2[1, 0]) + + it.setIndexSet(cellsInter) + while it.step(): + cellNo3 = it.i + self.local_matrix_surface.setCell1(cellNo3) + panel = self.local_matrix_surface.getPanelType() + if panel != IGNORED: + if self.mesh.dim == 1: + if self.local_matrix_surface.center1[0] < self.local_matrix_surface.center2[0]: + fac = 1. + else: + fac = -1. + else: + fac = 1. + self.local_matrix_surface.eval(contribZeroExterior, panel) + iM.getDoFsElem(cellNo3) + if self.local_matrix_surface.symmetricLocalMatrix: + iM.addToMatrixElemSym(contribZeroExterior, fac) + else: + iM.addToMatrixElem(contribZeroExterior, fac) + sValuePre = self.local_matrix_surface.kernel.sValue + + if surfaceIntegralNeedsShift: + if self.mesh.dim == 1: + self.local_matrix_surface.center2[0] -= 2.*evalShift + elif self.mesh.dim == 2: + self.local_matrix_surface.center2[0] -= 2.*evalShift*(self.local_matrix_surface.simplex2[1, 1]-self.local_matrix_surface.simplex2[0, 1]) + self.local_matrix_surface.center2[1] -= 2.*evalShift*(self.local_matrix_surface.simplex2[0, 0]-self.local_matrix_surface.simplex2[1, 0]) + + it.reset() + while it.step(): + cellNo3 = it.i + self.local_matrix_surface.setCell1(cellNo3) + panel = self.local_matrix_surface.getPanelType() + if panel != IGNORED: + if self.mesh.dim == 1: + if self.local_matrix_surface.center1[0] < self.local_matrix_surface.center2[0]: + fac = -1. + else: + fac = 1. + else: + fac = -1. + self.local_matrix_surface.eval(contribZeroExterior, panel) + iM.getDoFsElem(cellNo3) + # if self.local_matrix_surface.symmetricLocalMatrix: + iM.addToMatrixElemSym(contribZeroExterior, fac) + # else: + # iM.addToMatrixElem(contribZeroExterior, fac) + sValuePost = self.local_matrix_surface.kernel.sValue + if abs(sValuePre-sValuePost) < 1e-9: + print(np.array(self.local_matrix_surface.simplex2)) + assert False, "Jump of fractional order between elements is zero (Value = {}). Check that the mesh aligns with the jump in the fractional order.".format(sValuePre) + if not self.zeroExterior and not self.kernel.finiteHorizon: + with self.PLogger.Timer(prefix+'zeroExterior'): + # Subtract the zeroExterior contribution for Omega x Omega^C that was added in the previous loop. + # This is for the regional fractional Laplacian. + surface = self.mesh.get_surface_mesh() + iM = {SCALAR_label}IndexManager(self.dm, Anear, myDofs=myDofs) + + self.local_matrix_zeroExterior.setMesh2(surface) + + for cellNo1 in range(self.mesh.num_cells): + self.local_matrix_zeroExterior.setCell1(cellNo1) + iM.getDoFsElem(cellNo1) + mask = iM.getElemSymMask() + for cellNo2 in range(surface.num_cells): + self.local_matrix_zeroExterior.setCell2(cellNo2) + if self.mesh.dim == 1: + if self.local_matrix_zeroExterior.center1[0] < self.local_matrix_zeroExterior.center2[0]: + self.local_matrix_zeroExterior.center2[0] += evalShift + else: + self.local_matrix_zeroExterior.center2[0] -= evalShift + elif self.mesh.dim == 2: + self.local_matrix_zeroExterior.center2[0] += evalShift*(self.local_matrix_zeroExterior.simplex2[1, 1]-self.local_matrix_zeroExterior.simplex2[0, 1]) + self.local_matrix_zeroExterior.center2[1] -= evalShift*(self.local_matrix_zeroExterior.simplex2[1, 0]-self.local_matrix_zeroExterior.simplex2[0, 0]) + panel = self.local_matrix_zeroExterior.getPanelType() + self.local_matrix_zeroExterior.eval(contribZeroExterior, panel, mask) + # if self.local_matrix_zeroExterior.symmetricLocalMatrix: + iM.addToMatrixElemSym(contribZeroExterior, -1.) + # else: + # iM.addToMatrixElem(contribZeroExterior, -1.) + elif not self.zeroExterior and self.kernel.finiteHorizon: + with self.PLogger.Timer(prefix+'zeroExterior'): + # Subtract the contribution for Omega x (\partial B_\delta(x)) + assert isinstance(self.kernel.horizon, constant) + self.local_matrix_zeroExterior.center2 = uninitialized((self.mesh.dim), dtype=REAL) + coeff = horizonSurfaceIntegral(self.local_matrix_zeroExterior.kernel, self.kernel.horizon.value) + qr = simplexXiaoGimbutas(2, self.mesh.dim) + if self.mesh.dim == 1: + mass = mass_1d_sym_scalar_anisotropic(coeff, self.dm, qr) + elif self.mesh.dim == 2: + mass = mass_2d_sym_scalar_anisotropic(coeff, self.dm, qr) + else: + raise NotImplementedError() + assembleMatrix(self.mesh, self.dm, mass, A=Anear) + + return Anear + + def reduceNearOp(self, LinearOperator Anear, indexSet myDofs): + cdef: + INDEX_t k = -1, kk, jj + INDEX_t[::1] A_indptr = Anear.indptr, A_indices = Anear.indices + REAL_t[::1] A_data = Anear.data, A_diagonal = None + INDEX_t[::1] indptr, indices + REAL_t[::1] data, diagonal = None + LinearOperator Aother + INDEX_t I, nnz + indexSetIterator it = myDofs.getIter() + counts = np.zeros((self.comm.size), dtype=INDEX) + self.comm.Gather(np.array([Anear.nnz], dtype=INDEX), counts) + if self.comm.rank == 0: + LOGGER.info('Near field entries per rank: {} ({}) / {} / {} ({}) imbalance: {}'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax(), counts.max()/counts.min())) + # drop entries that are not in rows of myRoot.dofs + Anear = self.dropOffRank(Anear, myDofs) + + A_indptr = Anear.indptr + + # sum distribute matrices by stacking rows + indptr = np.zeros((self.dm.num_dofs+1), dtype=INDEX) + for k in range(self.dm.num_dofs): + indptr[k+1] = A_indptr[k+1]-A_indptr[k] + if self.comm.rank == 0: + self.comm.Reduce(MPI.IN_PLACE, indptr, root=0) + else: + self.comm.Reduce(indptr, indptr, root=0) + + if self.comm.rank == 0: + for k in range(self.dm.num_dofs): + indptr[k+1] += indptr[k] + nnz = indptr[self.dm.num_dofs] + + indices = uninitialized((nnz), dtype=INDEX) + data = uninitialized((nnz), dtype=REAL) + if isinstance(Anear, SSS_LinearOperator): + diagonal = np.zeros((self.dm.num_dofs), dtype=REAL) + + for p in range(self.comm.size): + if p == 0: + Aother = Anear + else: + Aother = self.comm.recv(source=p) + + A_indptr = Aother.indptr + A_indices = Aother.indices + A_data = Aother.data + + for I in range(self.dm.num_dofs): + kk = indptr[I] + for jj in range(A_indptr[I], A_indptr[I+1]): + indices[kk] = A_indices[jj] + data[kk] = A_data[jj] + kk += 1 + + if isinstance(Aother, SSS_LinearOperator): + A_diagonal = Aother.diagonal + for I in range(self.dm.num_dofs): + diagonal[I] += A_diagonal[I] + + if isinstance(Anear, SSS_LinearOperator): + Anear = SSS_LinearOperator(indices, indptr, data, diagonal) + else: + Anear = CSR_LinearOperator(indices, indptr, data) + else: + self.comm.send(Anear, dest=0) + self.comm.Barrier() + + if self.comm.rank != 0: + Anear = None + else: + LOGGER.info('Anear reduced: {}'.format(Anear)) + # Anear = self.comm.bcast(Anear, root=0) + return Anear + + def dropOffRank(self, LinearOperator Anear, indexSet myDofs): + cdef: + INDEX_t k = -1, kk, jj + INDEX_t[::1] A_indptr = Anear.indptr, A_indices = Anear.indices + REAL_t[::1] A_data = Anear.data, A_diagonal = None + INDEX_t[::1] indptr, indices + REAL_t[::1] data, diagonal = None + indexSetIterator it = myDofs.getIter() + # drop entries that are not in rows of myRoot.dofs + indptr = np.zeros((self.dm.num_dofs+1), dtype=INDEX) + while it.step(): + k = it.i + indptr[k+1] = A_indptr[k+1]-A_indptr[k] + for k in range(self.dm.num_dofs): + indptr[k+1] += indptr[k] + indices = uninitialized((indptr[self.dm.num_dofs]), dtype=INDEX) + data = uninitialized((indptr[self.dm.num_dofs]), dtype=REAL) + it.reset() + while it.step(): + k = it.i + kk = indptr[k] + for jj in range(A_indptr[k], A_indptr[k+1]): + indices[kk] = A_indices[jj] + data[kk] = A_data[jj] + kk += 1 + if isinstance(Anear, SSS_LinearOperator): + A_diagonal = Anear.diagonal + diagonal = np.zeros((self.dm.num_dofs), dtype=REAL) + it.reset() + while it.step(): + k = it.i + diagonal[k] = A_diagonal[k] + Anear = SSS_LinearOperator(indices, indptr, data, diagonal) + else: + Anear = CSR_LinearOperator(indices, indptr, data) + return Anear + + def getDiagonal(self): + cdef: + {SCALAR_label}diagonalOperator D + INDEX_t I + INDEX_t start, end + D = {SCALAR_label}diagonalOperator(np.zeros((self.dm.num_dofs), dtype=REAL)) + if self.comm: + start = np.ceil(self.dm.num_dofs*self.comm.rank/self.comm.size) + end = np.ceil(self.dm.num_dofs*(self.comm.rank+1)/self.comm.size) + else: + start = 0 + end = self.dm.num_dofs + if self.kernel.variable: + for I in range(start, end): + D.setEntry(I, I, self.getEntryCluster(I, I)) + else: + for I in range(start, end): + D.setEntry(I, I, self.getEntry(I, I)) + if self.comm: + self.comm.Allreduce(MPI.IN_PLACE, D.data) + return D + + def getDiagonalCluster(self): + cdef: + {SCALAR_label}diagonalOperator D + tree_node n + nearFieldClusterPair c + INDEX_t I + list clusters = [] + REAL_t[:, :, ::1] fake_boxes = uninitialized((0, 0, 0), dtype=REAL) + list d2c = self.d2c + D = {SCALAR_label}diagonalOperator(np.zeros((self.dm.num_dofs), dtype=REAL)) + for I in range(self.dm.num_dofs): + n = tree_node(None, set([I]), fake_boxes) + n._cells = d2c[I] + c = nearFieldClusterPair(n, n) + c.set_cells() + clusters.append(c) + D = self.assembleClusters(clusters, Anear=D) + if self.comm: + self.comm.Allreduce(MPI.IN_PLACE, D.data) + return D + + def getKernelBlocksAndJumps(self): + cdef: + meshBase mesh = self.mesh + DoFMap DoFMap = self.dm + fractionalOrderBase s = self.kernel.s + REAL_t[::1] orders = None + REAL_t[::1] dofOrders + REAL_t cellOrder + dict blocks + INDEX_t[::1] cellPair = uninitialized((2), dtype=INDEX) + INDEX_t[::1] edge = uninitialized((2), dtype=INDEX) + INDEX_t cellNo, dofNo, dof, cellNo1, cellNo2, vertexNo1, vertexNo2, vertex1, vertex2, i + ENCODE_t hv + REAL_t UNASSIGNED = -np.inf + if isinstance(s, piecewiseConstantFractionalOrder): + orders = P0_DoFMap(mesh).interpolate(s.blockIndicator) + else: + orders = P0_DoFMap(mesh).interpolate(s.diagonal()) + dofOrders = np.full((DoFMap.num_dofs), fill_value=UNASSIGNED, dtype=REAL) + for cellNo in range(mesh.num_cells): + cellOrder = orders[cellNo] + for dofNo in range(DoFMap.dofs_per_element): + dof = DoFMap.cell2dof(cellNo, dofNo) + if dof >= 0: + if dofOrders[dof] == UNASSIGNED: + dofOrders[dof] = cellOrder + elif dofOrders[dof] != INTERFACE_DOF: + if dofOrders[dof] != cellOrder: + dofOrders[dof] = INTERFACE_DOF + # blocks is a dict + # value fractional order -> set of dofs + # dofs at interfaces between different fractional orders are in blocks[INTERFACE_DOF] + blocks = {} + for dof in range(DoFMap.num_dofs): + try: + blocks[dofOrders[dof]].add(dof) + except KeyError: + blocks[dofOrders[dof]] = set([dof]) + LOGGER.debug('Block sizes: '+str({key: len(blocks[key]) for key in blocks})) + + # jumps is a dict of element interfaces where the kernel has a jump. + # in 1D: + # encoded cell pair -> vertex at the interface between cells + # in 2D: + # encoded cell pair -> encoded edge + jumps = {} + cellConnectivity = mesh.getCellConnectivity(mesh.dim) + for cellNo1 in range(mesh.num_cells): + for cellNo2 in cellConnectivity[cellNo1]: + if orders[cellNo1] != orders[cellNo2]: + sortEdge(cellNo1, cellNo2, cellPair) + hv = encode_edge(cellPair) + if mesh.dim == 1: + for vertexNo1 in range(mesh.dim+1): + vertex1 = mesh.cells[cellNo1, vertexNo1] + for vertexNo2 in range(mesh.dim+1): + vertex2 = mesh.cells[cellNo2, vertexNo2] + if vertex1 == vertex2: + jumps[hv] = vertex1 + break + else: + i = 0 + for vertexNo1 in range(mesh.dim+1): + vertex1 = mesh.cells[cellNo1, vertexNo1] + for vertexNo2 in range(mesh.dim+1): + vertex2 = mesh.cells[cellNo2, vertexNo2] + if vertex1 == vertex2: + edge[i] = vertex1 + i += 1 + break + hv2 = encode_edge(edge) + jumps[hv] = hv2 + return blocks, jumps + + def getTree(self, + BOOL_t doDistributedAssembly, + refinementParams refParams, + REAL_t[:, :, ::1] boxes, + sparseGraph cells, + REAL_t[:, ::1] coords, + BOOL_t allNearField=False, + DoFMap dm=None): + cdef: + INDEX_t num_cluster_dofs + dict blocks = {}, jumps = {} + indexSet dofs, clusterDofs, subDofs, blockDofs + indexSetIterator it + REAL_t key + tree_node root, myRoot, n + + if dm is None: + dm = self.dm + + with self.PLogger.Timer('prepare tree'): + dofs = arrayIndexSet(np.arange(dm.num_dofs, dtype=INDEX), sorted=True) + root = tree_node(None, dofs, boxes, mixed_node=allNearField) + + if doDistributedAssembly: + from PyNucleus_fem.meshPartitioning import PartitionerException + + try: + root.partition(dm, self.comm, boxes, canBeAssembled=not self.kernel.variable, mixed_node=allNearField, params=self.params) + except PartitionerException: + doDistributedAssembly = False + LOGGER.warning('Falling back to serial assembly') + # check again, in case partitioning failed + if doDistributedAssembly: + myRoot = root.children[self.comm.rank] + else: + myRoot = root + + if self.kernel.variable and not (self.kernel.variableOrder and isinstance(self.kernel.s, singleVariableUnsymmetricFractionalOrder)): + blocks, jumps = self.getKernelBlocksAndJumps() + if len(jumps) > 0: + my_id = root.get_max_id()+1 + for n in root.leaves(): + clusterDofs = n.get_dofs() + num_cluster_dofs = clusterDofs.getNumEntries() + num_dofs = 0 + children = [] + for key in sorted(blocks): + blockDofs = arrayIndexSet() + blockDofs.fromSet(blocks[key]) + subDofs = blockDofs.inter(clusterDofs) + if subDofs.getNumEntries() > 0: + num_dofs += subDofs.getNumEntries() + children.append(tree_node(n, subDofs, boxes, mixed_node=key == INTERFACE_DOF)) + children[len(children)-1].id = my_id + my_id += 1 + assert num_dofs == num_cluster_dofs, (num_dofs, num_cluster_dofs) + n.children = children + n._dofs = None + # node ids are otherwise incorrect + # assert not doDistributedAssembly, "Cannot assemble variable kernel in distributed mode" + else: + for n in root.leaves(): + n.canBeAssembled = True + LOGGER.info('Jumps: {}, Block sizes: {}, Leaf nodes: {}'.format(len(jumps), str({key: len(blocks[key]) for key in blocks}), len(list(root.leaves())))) + + if doDistributedAssembly: + if self.kernel.variable: + root.irregularLevelsOffset = root.numLevels-1 + else: + root.irregularLevelsOffset = 1 + else: + root.irregularLevelsOffset = 1 + + if refParams.maxLevels <= 0: + refParams.maxLevels = root.numLevels+refParams.maxLevels + + return root, myRoot, jumps, doDistributedAssembly + + def getAdmissibleClusters(self, + tree_node root, tree_node myRoot, + BOOL_t doDistributedAssembly, + refinementParams refParams, + REAL_t[:, :, ::1] boxes, + sparseGraph cells, + REAL_t[:, ::1] coords, + BOOL_t assembleOnRoot=True, + BOOL_t ignoreDiagonalBlocks=False): + cdef: + dict Pfar = {} + list Pnear = [] + INDEX_t lvl, id1, id2 + nearFieldClusterPair cPnear + farFieldClusterPair cP + tree_node n1, n + dict added + INDEX_t N + dict node_lookup + INDEX_t dof, k + set myCells + with self.PLogger.Timer('admissible clusters'): + if doDistributedAssembly: + if assembleOnRoot: + # we need all tree nodes to be already available when we gather the far field clusters + for n in root.leaves(): + n.refine(boxes, coords, refParams, recursive=True) + + for n in root.children: + if ignoreDiagonalBlocks and (n.id == myRoot.id): + pass + getAdmissibleClusters(self.local_matrix.kernel, myRoot, n, + refParams, + Pfar=Pfar, Pnear=Pnear, + boxes1=boxes, + coords1=coords, + boxes2=boxes, + coords2=coords) + + symmetrizeNearFieldClusters(Pnear) + + counts = np.zeros((self.comm.size), dtype=INDEX) + self.comm.Gather(np.array([myRoot.num_dofs], dtype=INDEX), counts) + LOGGER.info('Unknowns per rank: {} ({}) / {} / {} ({})'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax())) + + self.comm.Gather(np.array([len(Pnear)], dtype=INDEX), counts) + LOGGER.info('Near field cluster pairs per rank: {} ({}) / {} / {} ({})'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax())) + + if assembleOnRoot: + # collect far field on rank 0 + farField = [] + for lvl in Pfar: + for cP in Pfar[lvl]: + # "lvl+1", since the ranks are children of the global root + farField.append((lvl+1, cP.n1.id, cP.n2.id)) + farField = np.array(farField, dtype=INDEX) + self.comm.Gather(np.array([farField.shape[0]], dtype=INDEX), counts) + if self.comm.rank == 0: + LOGGER.info('Far field cluster pairs per rank: {} ({}) / {} / {} ({})'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax())) + N = 0 + for rank in range(self.comm.size): + N += counts[rank] + farFieldCollected = uninitialized((N, 3), dtype=INDEX) + counts *= 3 + else: + farFieldCollected = None + self.comm.Gatherv(farField, [farFieldCollected, (counts, None)], root=0) + del farField + + if self.comm.rank == 0: + Pfar = {} + added = {} + node_lookup = {} + for n1 in root.get_tree_nodes(): + node_lookup[n1.id] = n1 + for k in range(farFieldCollected.shape[0]): + lvl, id1, id2 = farFieldCollected[k, :] + cP = farFieldClusterPair(node_lookup[id1], + node_lookup[id2]) + try: + if (id1, id2) not in added[lvl]: + Pfar[lvl].append(cP) + added[lvl].add((id1, id2)) + except KeyError: + Pfar[lvl] = [cP] + added[lvl] = set([(id1, id2)]) + del farFieldCollected + else: + Pfar = {} + else: + getAdmissibleClusters(self.local_matrix.kernel, root, root, + refParams, + Pfar=Pfar, Pnear=Pnear, + boxes1=boxes, + coords1=coords, + boxes2=boxes, + coords2=coords) + + if self.params.get('trim', True): + trimTree(root, Pnear, Pfar, self.comm) + + # Enter cells in leaf nodes + it = arrayIndexSetIterator() + for n in root.leaves(): + myCells = set() + it.setIndexSet(n.dofs) + while it.step(): + dof = it.i + for k in range(cells.indptr[dof], + cells.indptr[dof+1]): + myCells.add(cells.indices[k]) + n._cells = arrayIndexSet() + n._cells.fromSet(myCells) + del cells + + # set the cells of the near field cluster pairs + for cPnear in Pnear: + cPnear.set_cells() + return Pnear, Pfar + + def getCoveringClusters(self, + tree_node root, tree_node myRoot, + BOOL_t doDistributedAssembly, + refinementParams refParams, + REAL_t[:, :, ::1] boxes, + sparseGraph cells, + REAL_t[:, ::1] coords, + BOOL_t assembleOnRoot=True, + BOOL_t ignoreDiagonalBlocks=False): + cdef: + list Pnear = [] + nearFieldClusterPair cPnear + tree_node n + INDEX_t dof, k + set myCells + with self.PLogger.Timer('covering clusters'): + if doDistributedAssembly: + if assembleOnRoot: + # we need all tree nodes to be already available when we gather the far field clusters + for n in root.leaves(): + n.refine(boxes, coords, refParams, recursive=True) + + for n in root.children: + if ignoreDiagonalBlocks and (n.id == myRoot.id): + pass + getCoveringClusters(self.local_matrix.kernel, myRoot, n, + refParams, + Pnear, + boxes1=boxes, + coords1=coords, + boxes2=boxes, + coords2=coords) + + symmetrizeNearFieldClusters(Pnear) + + counts = np.zeros((self.comm.size), dtype=INDEX) + self.comm.Gather(np.array([myRoot.num_dofs], dtype=INDEX), counts) + LOGGER.info('Unknowns per rank: {} ({}) / {} / {} ({})'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax())) + + self.comm.Gather(np.array([len(Pnear)], dtype=INDEX), counts) + LOGGER.info('Near field cluster pairs per rank: {} ({}) / {} / {} ({})'.format(counts.min(), counts.argmin(), counts.mean(), counts.max(), counts.argmax())) + + else: + getCoveringClusters(self.kernel, root, root, + refParams, + Pnear, + boxes1=boxes, + coords1=coords, + boxes2=boxes, + coords2=coords) + + if self.params.get('trim', True): + trimTree(root, Pnear, {}, self.comm) + + # Enter cells in leaf nodes + it = arrayIndexSetIterator() + for n in root.leaves(): + myCells = set() + it.setIndexSet(n.dofs) + while it.step(): + dof = it.i + for k in range(cells.indptr[dof], + cells.indptr[dof+1]): + myCells.add(cells.indices[k]) + n._cells = arrayIndexSet() + n._cells.fromSet(myCells) + del cells + + # set the cells of the near field cluster pairs + for cPnear in Pnear: + cPnear.set_cells() + + return Pnear + + def getH2RefinementParams(self): + cdef: + meshBase mesh = self.mesh + refinementParams refParams + REAL_t singularity = self.kernel.max_singularity + + target_order = self.local_matrix.target_order + refParams.eta = self.params.get('eta', 3.) + + iO = self.params.get('interpolation_order', None) + if iO is None: + loggamma = abs(np.log(0.25)) + refParams.interpolation_order = max(np.ceil((2*target_order+max(-singularity, 2))*abs(np.log(mesh.hmin/mesh.diam))/loggamma/3.), 2) + else: + refParams.interpolation_order = iO + mL = self.params.get('maxLevels', None) + if mL is None: + # maxLevels = max(int(np.around(np.log2(DoFMap.num_dofs)/mesh.dim-np.log2(refParams.interpolation_order))), 0) + refParams.maxLevels = 200 + else: + refParams.maxLevels = mL + refParams.maxLevelsMixed = refParams.maxLevels + mCS = self.params.get('minClusterSize', None) + if mCS is None: + refParams.minSize = refParams.interpolation_order**mesh.dim//2 + else: + refParams.minSize = mCS + if self.kernel.finiteHorizon: + refParams.minMixedSize = max(min(self.kernel.horizon.value//(2*mesh.h)-1, refParams.minSize), 1) + else: + refParams.minMixedSize = refParams.minSize + mFFBS = self.params.get('minFarFieldBlockSize', None) + if mFFBS is None: + # For this value, size(kernelInterpolant) == size(dense block) + # If we choose a smaller value for minFarFieldBlockSize, then we use more memory, + # but we might save time, since the assembly of a far field block is cheaper than a near field block. + refParams.farFieldInteractionSize = refParams.interpolation_order**(2*mesh.dim) + else: + refParams.farFieldInteractionSize = mFFBS + + rT = self.params.get('refinementType', 'MEDIAN') + refParams.refType = {'geometric': GEOMETRIC, + 'GEOMETRIC': GEOMETRIC, + 'median': MEDIAN, + 'MEDIAN': MEDIAN, + 'barycenter': BARYCENTER, + 'BARYCENTER': BARYCENTER}[rT] + + refParams.splitEveryDim = self.params.get('splitEveryDim', False) + + refParams.attemptRefinement = True + + return refParams + + def doLocalFarFieldIndexing(self, tree_node myRoot, REAL_t[:, :, ::1] boxes): + cdef: + meshBase mesh = self.mesh + REAL_t[:, :, ::1] local_boxes = None + INDEX_t local_dof, global_dof, k, new_dof, i, j + dict lookup + CSR_LinearOperator lclR = None, lclP = None + INDEX_t[::1] newDoFsArray + unsortedArrayIndexSet newDoFs + indexSetIterator it + DoFMap local_dm = None + tree_node n + arrayIndexSet oldDoFs + with self.PLogger.Timer('localFarFieldIndexing'): + lclDoFs = myRoot.dofs.toArray() + lclIndicator = self.dm.zeros() + lclIndicator.toarray()[lclDoFs] = 1. + split = dofmapSplitter(self.dm, {'lcl': lclIndicator}) + local_dm = split.getSubMap('lcl') + local_dm.inner = ip_distributed_nonoverlapping(self.comm) + local_dm.norm = norm_distributed_nonoverlapping(self.comm) + lclR, lclP = split.getRestrictionProlongation('lcl') + lookup = {} + for local_dof in range(local_dm.num_dofs): + global_dof = lclR.indices[local_dof] + lookup[global_dof] = local_dof + for n in myRoot.leaves(): + oldDoFs = n._dofs + newDoFsArray = uninitialized((oldDoFs.getNumEntries()), dtype=INDEX) + k = 0 + it = oldDoFs.getIter() + while it.step(): + dof = it.i + new_dof = lookup[dof] + newDoFsArray[k] = new_dof + k += 1 + newDoFs = unsortedArrayIndexSet(newDoFsArray) + n._local_dofs = newDoFs + local_boxes = uninitialized((local_dm.num_dofs, mesh.dim, 2), dtype=REAL) + for local_dof in range(local_dm.num_dofs): + global_dof = lclR.indices[local_dof] + for i in range(mesh.dim): + for j in range(2): + local_boxes[local_dof, i, j] = boxes[global_dof, i, j] + return local_boxes, local_dm, lclR, lclP + + def getH2(self, BOOL_t returnNearField=False, returnTree=False, tree_node root=None, tree_node myRoot=None, dict jumps={}, BOOL_t ignoreDiagonalBlocks=False): + cdef: + meshBase mesh = self.mesh + DoFMap DoFMap = self.dm + REAL_t[:, :, ::1] boxes = None, local_boxes + sparseGraph cells = None + REAL_t[:, ::1] coords = None + dict Pfar + list Pnear + LinearOperator h2 = None, Anear = None + BOOL_t forceUnsymmetric, doDistributedAssembly = False, assembleOnRoot = True, localFarFieldIndexing = False + refinementParams refParams + CSR_LinearOperator lclR + + refParams = self.getH2RefinementParams() + + forceUnsymmetric = self.params.get('forceUnsymmetric', False) + doDistributedAssembly = self.comm is not None and self.comm.size > 1 and DoFMap.num_dofs > self.comm.size + assembleOnRoot = self.params.get('assembleOnRoot', True) + localFarFieldIndexing = self.params.get('localFarFieldIndexing', False) + localFarFieldIndexing = doDistributedAssembly and not assembleOnRoot and localFarFieldIndexing + if doDistributedAssembly and not assembleOnRoot: + assert forceUnsymmetric + + with self.PLogger.Timer('boxes, cells, coords'): + boxes, cells = getDoFBoxesAndCells(self.dm.mesh, self.dm, self.comm) + coords = self.dm.getDoFCoordinates() + + # construct the cluster tree + if root is None: + root, myRoot, jumps, doDistributedAssembly = self.getTree(doDistributedAssembly, refParams, boxes, cells, coords) + + # get the admissible cluster pairs + Pnear, Pfar = self.getAdmissibleClusters(root, myRoot, doDistributedAssembly, refParams, boxes, cells, coords, assembleOnRoot=assembleOnRoot, ignoreDiagonalBlocks=ignoreDiagonalBlocks) + lenPfar = len(Pfar) + if doDistributedAssembly: + lenPfar = self.comm.bcast(lenPfar) + + if lenPfar > 0: + LOGGER.info('interpolation_order: {}, maxLevels: {}, minClusterSize: {}, minMixedClusterSize: {}, minFarFieldBlockSize: {}, eta: {}'.format(refParams.interpolation_order, + refParams.maxLevels, + refParams.minSize, + refParams.minMixedSize, + refParams.farFieldInteractionSize, + refParams.eta)) + + # get near field matrix + with self.PLogger.Timer('near field'): + Anear = self.assembleClusters(Pnear, jumps=jumps, forceUnsymmetric=forceUnsymmetric, myRoot=myRoot, doDistributedAssembly=doDistributedAssembly) + if doDistributedAssembly and assembleOnRoot: + with self.PLogger.Timer('reduceNearOp'): + Anear = self.reduceNearOp(Anear, myRoot.get_dofs()) + + if localFarFieldIndexing: + local_boxes, local_dm, lclR, lclP = self.doLocalFarFieldIndexing(myRoot, boxes) + + with self.PLogger.Timer('leaf values'): + # get leave values + if self.kernel.max_singularity > -self.kernel.dim-2: + if not localFarFieldIndexing: + root.enterLeafValues(mesh, DoFMap, refParams.interpolation_order, boxes, self.comm, assembleOnRoot=assembleOnRoot) + else: + myRoot.enterLeafValues(mesh, local_dm, refParams.interpolation_order, local_boxes, local=True) + elif (self.kernel.min_singularity < -self.kernel.dim-2) and (self.kernel.max_singularity > -self.kernel.dim-4): + if not localFarFieldIndexing: + root.enterLeafValuesGrad(mesh, DoFMap, refParams.interpolation_order, boxes, self.comm) + else: + raise NotImplementedError() + else: + raise NotImplementedError() + + if self.comm is None or (assembleOnRoot and self.comm.rank == 0) or (not assembleOnRoot): + with self.PLogger.Timer('far field'): + # get kernel interpolations + bemMode = False + assembleFarFieldInteractions(self.local_matrix.kernel, Pfar, refParams.interpolation_order, DoFMap, bemMode) + + with self.PLogger.Timer('transfer matrices'): + # get transfer matrices + root.prepareTransferOperators(refParams.interpolation_order) + + if self.comm is None or (assembleOnRoot and self.comm.rank == 0): + h2 = H2Matrix(root, Pfar, Anear) + else: + with self.PLogger.Timer('setup distributed op'): + local_h2 = H2Matrix(root, Pfar, Anear) + if not localFarFieldIndexing: + h2 = DistributedH2Matrix_globalData(local_h2, self.comm) + else: + h2 = DistributedH2Matrix_localData(local_h2, Pnear, self.comm, self.dm, local_dm, lclR, lclP) + else: + h2 = nullOperator(self.dm.num_dofs, self.dm.num_dofs) + LOGGER.info('{}'.format(h2)) + elif len(Pnear) == 0: + h2 = nullOperator(self.dm.num_dofs, self.dm.num_dofs) + else: + LOGGER.info('Cannot assemble H2 operator, assembling dense matrix instead') + with self.PLogger.Timer('dense operator'): + h2 = self.getDense() + if returnNearField: + if returnTree: + return h2, Pnear, root + else: + return h2, Pnear + else: + if returnTree: + return h2, root + else: + return h2 + + def getH2FiniteHorizon(self, LinearOperator Ainf=None): + A = horizonCorrected(self.mesh, self.dm, self.kernel, self.comm, Ainf, logging=isinstance(self.PLogger, (PLogger, LoggingPLogger))) + return A diff --git a/nl/PyNucleus_nl/nonlocalProblems.py b/nl/PyNucleus_nl/nonlocalProblems.py index 8205cd6d..90af6f91 100644 --- a/nl/PyNucleus_nl/nonlocalProblems.py +++ b/nl/PyNucleus_nl/nonlocalProblems.py @@ -56,6 +56,7 @@ feFractionalOrder) from . kernelsCy import (getKernelEnum, FRACTIONAL, INDICATOR, PERIDYNAMIC, GAUSSIAN, + LOGINVERSEDISTANCE, MONOMIAL, ) from . kernels import (getFractionalKernel, getIntegrableKernel, @@ -108,6 +109,8 @@ def build(self, name, *args, **kwargs): kernelFactory.register('indicator', getIntegrableKernel, params={'kernel': INDICATOR}, aliases=['constant']) kernelFactory.register('inverseDistance', getIntegrableKernel, params={'kernel': PERIDYNAMIC}, aliases=['peridynamic', 'inverseOfDistance']) kernelFactory.register('gaussian', getIntegrableKernel, params={'kernel': GAUSSIAN}) +kernelFactory.register('logInverseDistance', getIntegrableKernel, params={'kernel': LOGINVERSEDISTANCE}) +kernelFactory.register('monomial', getIntegrableKernel, params={'kernel': MONOMIAL}) class nonlocalMeshFactoryClass(factory): @@ -1503,3 +1506,5 @@ def getIdentifier(self, params): except KeyError: d.append((k, str(params[k]))) return '-'.join(['brusselator'] + [key + '=' + v for key, v in d]) + + diff --git a/nl/PyNucleus_nl/twoPointFunctions.pxd b/nl/PyNucleus_nl/twoPointFunctions.pxd index 45ce00fa..cb44bb44 100644 --- a/nl/PyNucleus_nl/twoPointFunctions.pxd +++ b/nl/PyNucleus_nl/twoPointFunctions.pxd @@ -6,25 +6,12 @@ ################################################################################### cimport numpy as np -from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, BOOL_t -from PyNucleus_fem.functions cimport function +from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, COMPLEX_t, BOOL_t +from PyNucleus_fem.functions cimport function, complexFunction -cdef class twoPointFunction: - cdef: - public BOOL_t symmetric - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y) - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y) - - -cdef class productTwoPoint(twoPointFunction): - cdef: - public twoPointFunction f1, f2 - - -cdef class constantTwoPoint(twoPointFunction): - cdef: - public REAL_t value +include "twoPointFunctions_decl_REAL.pxi" +include "twoPointFunctions_decl_COMPLEX.pxi" cdef class leftRightTwoPoint(twoPointFunction): @@ -60,18 +47,6 @@ cdef class unsymTwoPoint(twoPointFunction): public REAL_t l, r -cdef class parametrizedTwoPointFunction(twoPointFunction): - cdef: - void *params - cdef void setParams(self, void *params) - cdef void* getParams(self) - - -cdef class productParametrizedTwoPoint(parametrizedTwoPointFunction): - cdef: - public twoPointFunction f1, f2 - - cdef class inverseTwoPoint(twoPointFunction): cdef: twoPointFunction f diff --git a/nl/PyNucleus_nl/twoPointFunctions.pyx b/nl/PyNucleus_nl/twoPointFunctions.pyx index dab65eca..c1ccf280 100644 --- a/nl/PyNucleus_nl/twoPointFunctions.pyx +++ b/nl/PyNucleus_nl/twoPointFunctions.pyx @@ -12,103 +12,105 @@ Defines the base class for functions of two spatial variables, e.g. kernels, fra import numpy as np cimport numpy as np from libc.math cimport sqrt, exp, atan -from PyNucleus_base.myTypes import INDEX, REAL, ENCODE, BOOL +from PyNucleus_base.myTypes import INDEX, REAL, COMPLEX, ENCODE, BOOL cdef enum fixed_type: FIXED_X FIXED_Y DIAGONAL - -cdef class fixedTwoPointFunction(function): - cdef: - twoPointFunction f - REAL_t[::1] point - fixed_type fixedType - - def __init__(self, twoPointFunction f, REAL_t[::1] point, fixed_type fixedType): - self.f = f - self.point = point - self.fixedType = fixedType - - cdef REAL_t eval(self, REAL_t[::1] x): - if self.fixedType == FIXED_X: - return self.f(self.point, x) - if self.fixedType == FIXED_Y: - return self.f(x, self.point) - else: - return self.f(x, x) - - -cdef class twoPointFunction: - def __init__(self, BOOL_t symmetric): - self.symmetric = symmetric - - def __call__(self, REAL_t[::1] x, REAL_t[::1] y): - return self.eval(x, y) - - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): - raise NotImplementedError() - - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): - raise NotImplementedError() - - def __getstate__(self): - return self.symmetric - - def __setstate__(self, state): - twoPointFunction.__init__(self, state) - - def fixedX(self, REAL_t[::1] x): - return fixedTwoPointFunction(self, x, FIXED_X) - - def fixedY(self, REAL_t[::1] y): - return fixedTwoPointFunction(self, y, FIXED_Y) - - def diagonal(self): - return fixedTwoPointFunction(self, None, DIAGONAL) - - def plot(self, mesh, **kwargs): - cdef: - INDEX_t i, j - REAL_t[:, ::1] S - REAL_t[::1] S2 - REAL_t[::1] x, y - import matplotlib.pyplot as plt - c = np.array(mesh.getCellCenters()) - if mesh.dim == 1: - X, Y = np.meshgrid(c[:, 0], c[:, 0]) - x = np.empty((mesh.dim), dtype=REAL) - y = np.empty((mesh.dim), dtype=REAL) - S = np.zeros((mesh.num_cells, mesh.num_cells)) - for i in range(mesh.num_cells): - for j in range(mesh.num_cells): - x[0] = X[i, j] - y[0] = Y[i, j] - S[i, j] = self.eval(x, y) - plt.pcolormesh(X, Y, S, **kwargs) - plt.colorbar() - plt.xlabel(r'$x$') - plt.ylabel(r'$y$') - elif mesh.dim == 2: - S2 = np.zeros(mesh.num_cells) - for i in range(mesh.num_cells): - S2[i] = self(c[i, :], c[i, :]) - mesh.plotFunction(S2, flat=True) - else: - raise NotImplementedError() - - def __mul__(self, twoPointFunction other): - if isinstance(self, constantTwoPoint) and isinstance(other, constantTwoPoint): - return constantTwoPoint(self.value*other.value) - elif isinstance(self, parametrizedTwoPointFunction) or isinstance(other, parametrizedTwoPointFunction): - return productParametrizedTwoPoint(self, other) - elif isinstance(self, constantTwoPoint) and isinstance(other, (float, REAL)): - return constantTwoPoint(self.value*other) - elif isinstance(other, constantTwoPoint) and isinstance(self, (float, REAL)): - return constantTwoPoint(self*other.value) - else: - return productTwoPoint(self, other) +include "twoPointFunctions_REAL.pxi" +include "twoPointFunctions_COMPLEX.pxi" + +# cdef class fixedTwoPointFunction(function): +# cdef: +# twoPointFunction f +# REAL_t[::1] point +# fixed_type fixedType + +# def __init__(self, twoPointFunction f, REAL_t[::1] point, fixed_type fixedType): +# self.f = f +# self.point = point +# self.fixedType = fixedType + +# cdef REAL_t eval(self, REAL_t[::1] x): +# if self.fixedType == FIXED_X: +# return self.f(self.point, x) +# if self.fixedType == FIXED_Y: +# return self.f(x, self.point) +# else: +# return self.f(x, x) + + +# cdef class twoPointFunction: +# def __init__(self, BOOL_t symmetric): +# self.symmetric = symmetric + +# def __call__(self, REAL_t[::1] x, REAL_t[::1] y): +# return self.eval(x, y) + +# cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): +# raise NotImplementedError() + +# cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): +# raise NotImplementedError() + +# def __getstate__(self): +# return self.symmetric + +# def __setstate__(self, state): +# twoPointFunction.__init__(self, state) + +# def fixedX(self, REAL_t[::1] x): +# return fixedTwoPointFunction(self, x, FIXED_X) + +# def fixedY(self, REAL_t[::1] y): +# return fixedTwoPointFunction(self, y, FIXED_Y) + +# def diagonal(self): +# return fixedTwoPointFunction(self, None, DIAGONAL) + +# def plot(self, mesh, **kwargs): +# cdef: +# INDEX_t i, j +# REAL_t[:, ::1] S +# REAL_t[::1] S2 +# REAL_t[::1] x, y +# import matplotlib.pyplot as plt +# c = np.array(mesh.getCellCenters()) +# if mesh.dim == 1: +# X, Y = np.meshgrid(c[:, 0], c[:, 0]) +# x = np.empty((mesh.dim), dtype=REAL) +# y = np.empty((mesh.dim), dtype=REAL) +# S = np.zeros((mesh.num_cells, mesh.num_cells)) +# for i in range(mesh.num_cells): +# for j in range(mesh.num_cells): +# x[0] = X[i, j] +# y[0] = Y[i, j] +# S[i, j] = self.eval(x, y) +# plt.pcolormesh(X, Y, S, **kwargs) +# plt.colorbar() +# plt.xlabel(r'$x$') +# plt.ylabel(r'$y$') +# elif mesh.dim == 2: +# S2 = np.zeros(mesh.num_cells) +# for i in range(mesh.num_cells): +# S2[i] = self(c[i, :], c[i, :]) +# mesh.plotFunction(S2, flat=True) +# else: +# raise NotImplementedError() + +# def __mul__(self, twoPointFunction other): +# if isinstance(self, constantTwoPoint) and isinstance(other, constantTwoPoint): +# return constantTwoPoint(self.value*other.value) +# elif isinstance(self, parametrizedTwoPointFunction) or isinstance(other, parametrizedTwoPointFunction): +# return productParametrizedTwoPoint(self, other) +# elif isinstance(self, constantTwoPoint) and isinstance(other, (float, REAL)): +# return constantTwoPoint(self.value*other) +# elif isinstance(other, constantTwoPoint) and isinstance(self, (float, REAL)): +# return constantTwoPoint(self*other.value) +# else: +# return productTwoPoint(self, other) cdef class lambdaTwoPoint(twoPointFunction): @@ -138,47 +140,47 @@ cdef class lambdaTwoPoint(twoPointFunction): lambdaTwoPoint.__init__(self, state[0], state[1]) -cdef class productTwoPoint(twoPointFunction): - def __init__(self, twoPointFunction f1, twoPointFunction f2): - super(productTwoPoint, self).__init__(f1.symmetric and f2.symmetric) - self.f1 = f1 - self.f2 = f2 +# cdef class productTwoPoint(twoPointFunction): +# def __init__(self, twoPointFunction f1, twoPointFunction f2): +# super(productTwoPoint, self).__init__(f1.symmetric and f2.symmetric) +# self.f1 = f1 +# self.f2 = f2 - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): - return self.f1.eval(x, y)*self.f2.eval(x, y) +# cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): +# return self.f1.eval(x, y)*self.f2.eval(x, y) - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): - return self.f1.evalPtr(dim, x, y)*self.f2.evalPtr(dim, x, y) +# cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): +# return self.f1.evalPtr(dim, x, y)*self.f2.evalPtr(dim, x, y) - def __repr__(self): - return '{}*{}'.format(self.f1, self.f2) +# def __repr__(self): +# return '{}*{}'.format(self.f1, self.f2) - def __getstate__(self): - return self.f1, self.f2 +# def __getstate__(self): +# return self.f1, self.f2 - def __setstate__(self, state): - productTwoPoint.__init__(self, state[0], state[1]) +# def __setstate__(self, state): +# productTwoPoint.__init__(self, state[0], state[1]) -cdef class constantTwoPoint(twoPointFunction): - def __init__(self, REAL_t value): - super(constantTwoPoint, self).__init__(True) - self.value = value +# cdef class constantTwoPoint(twoPointFunction): +# def __init__(self, REAL_t value): +# super(constantTwoPoint, self).__init__(True) +# self.value = value - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): - return self.value +# cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): +# return self.value - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): - return self.value +# cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): +# return self.value - def __repr__(self): - return '{}'.format(self.value) +# def __repr__(self): +# return '{}'.format(self.value) - def __getstate__(self): - return self.value +# def __getstate__(self): +# return self.value - def __setstate__(self, state): - constantTwoPoint.__init__(self, state) +# def __setstate__(self, state): +# constantTwoPoint.__init__(self, state) cdef class matrixTwoPoint(twoPointFunction): @@ -518,15 +520,15 @@ cdef class unsymTwoPoint(twoPointFunction): return self.r -cdef class parametrizedTwoPointFunction(twoPointFunction): - def __init__(self, BOOL_t symmetric): - super(parametrizedTwoPointFunction, self).__init__(symmetric) +# cdef class parametrizedTwoPointFunction(twoPointFunction): +# def __init__(self, BOOL_t symmetric): +# super(parametrizedTwoPointFunction, self).__init__(symmetric) - cdef void setParams(self, void *params): - self.params = params +# cdef void setParams(self, void *params): +# self.params = params - cdef void* getParams(self): - return self.params +# cdef void* getParams(self): +# return self.params cdef class inverseTwoPoint(twoPointFunction): @@ -550,33 +552,33 @@ cdef class inverseTwoPoint(twoPointFunction): inverseTwoPoint.__init__(self, state) -cdef class productParametrizedTwoPoint(parametrizedTwoPointFunction): - def __init__(self, twoPointFunction f1, twoPointFunction f2): - super(productParametrizedTwoPoint, self).__init__(f1.symmetric and f2.symmetric) - self.f1 = f1 - self.f2 = f2 +# cdef class productParametrizedTwoPoint(parametrizedTwoPointFunction): +# def __init__(self, twoPointFunction f1, twoPointFunction f2): +# super(productParametrizedTwoPoint, self).__init__(f1.symmetric and f2.symmetric) +# self.f1 = f1 +# self.f2 = f2 - cdef void setParams(self, void *params): - cdef: - parametrizedTwoPointFunction f - if isinstance(self.f1, parametrizedTwoPointFunction): - f = self.f1 - f.setParams(params) - if isinstance(self.f2, parametrizedTwoPointFunction): - f = self.f2 - f.setParams(params) +# cdef void setParams(self, void *params): +# cdef: +# parametrizedTwoPointFunction f +# if isinstance(self.f1, parametrizedTwoPointFunction): +# f = self.f1 +# f.setParams(params) +# if isinstance(self.f2, parametrizedTwoPointFunction): +# f = self.f2 +# f.setParams(params) - cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): - return self.f1.eval(x, y)*self.f2.eval(x, y) +# cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y): +# return self.f1.eval(x, y)*self.f2.eval(x, y) - cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): - return self.f1.evalPtr(dim, x, y)*self.f2.evalPtr(dim, x, y) +# cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): +# return self.f1.evalPtr(dim, x, y)*self.f2.evalPtr(dim, x, y) - def __repr__(self): - return '{}*{}'.format(self.f1, self.f2) +# def __repr__(self): +# return '{}*{}'.format(self.f1, self.f2) - def __getstate__(self): - return self.f1, self.f2 +# def __getstate__(self): +# return self.f1, self.f2 - def __setstate__(self, state): - productParametrizedTwoPoint.__init__(self, state[0], state[1]) +# def __setstate__(self, state): +# productParametrizedTwoPoint.__init__(self, state[0], state[1]) diff --git a/nl/PyNucleus_nl/twoPointFunctions_decl_{SCALAR}.pxi b/nl/PyNucleus_nl/twoPointFunctions_decl_{SCALAR}.pxi new file mode 100644 index 00000000..35d49a04 --- /dev/null +++ b/nl/PyNucleus_nl/twoPointFunctions_decl_{SCALAR}.pxi @@ -0,0 +1,34 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + +cdef class {SCALAR_label}twoPointFunction: + cdef: + public BOOL_t symmetric + cdef {SCALAR}_t eval(self, REAL_t[::1] x, REAL_t[::1] y) + cdef {SCALAR}_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y) + + +cdef class {SCALAR_label}productTwoPoint({SCALAR_label}twoPointFunction): + cdef: + public twoPointFunction f1, f2 + + +cdef class {SCALAR_label}constantTwoPoint({SCALAR_label}twoPointFunction): + cdef: + public {SCALAR}_t value + + +cdef class {SCALAR_label}parametrizedTwoPointFunction({SCALAR_label}twoPointFunction): + cdef: + void *params + cdef void setParams(self, void *params) + cdef void* getParams(self) + + +cdef class {SCALAR_label}productParametrizedTwoPoint({SCALAR_label}parametrizedTwoPointFunction): + cdef: + public {SCALAR_label}twoPointFunction f1, f2 diff --git a/nl/PyNucleus_nl/twoPointFunctions_{SCALAR}.pxi b/nl/PyNucleus_nl/twoPointFunctions_{SCALAR}.pxi new file mode 100644 index 00000000..c8da7c35 --- /dev/null +++ b/nl/PyNucleus_nl/twoPointFunctions_{SCALAR}.pxi @@ -0,0 +1,182 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + +cdef class {SCALAR_label}twoPointFunction: + def __init__(self, BOOL_t symmetric): + self.symmetric = symmetric + + def __call__(self, REAL_t[::1] x, REAL_t[::1] y): + return self.eval(x, y) + + cdef {SCALAR}_t eval(self, REAL_t[::1] x, REAL_t[::1] y): + raise NotImplementedError() + + cdef {SCALAR}_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + raise NotImplementedError() + + def __getstate__(self): + return self.symmetric + + def __setstate__(self, state): + twoPointFunction.__init__(self, state) + + def fixedX(self, REAL_t[::1] x): + return fixedTwoPointFunction(self, x, FIXED_X) + + def fixedY(self, REAL_t[::1] y): + return fixedTwoPointFunction(self, y, FIXED_Y) + + def diagonal(self): + return fixedTwoPointFunction(self, None, DIAGONAL) + + def plot(self, mesh, **kwargs): + cdef: + INDEX_t i, j + {SCALAR}_t[:, ::1] S + {SCALAR}_t[::1] S2 + REAL_t[::1] x, y + import matplotlib.pyplot as plt + c = np.array(mesh.getCellCenters()) + if mesh.dim == 1: + X, Y = np.meshgrid(c[:, 0], c[:, 0]) + x = np.empty((mesh.dim), dtype={SCALAR}) + y = np.empty((mesh.dim), dtype={SCALAR}) + S = np.zeros((mesh.num_cells, mesh.num_cells)) + for i in range(mesh.num_cells): + for j in range(mesh.num_cells): + x[0] = X[i, j] + y[0] = Y[i, j] + S[i, j] = self.eval(x, y) + plt.pcolormesh(X, Y, S, **kwargs) + plt.colorbar() + plt.xlabel(r'$x$') + plt.ylabel(r'$y$') + elif mesh.dim == 2: + S2 = np.zeros(mesh.num_cells) + for i in range(mesh.num_cells): + S2[i] = self(c[i, :], c[i, :]) + mesh.plotFunction(S2, flat=True) + else: + raise NotImplementedError() + + def __mul__(self, {SCALAR_label}twoPointFunction other): + if isinstance(self, {SCALAR_label}constantTwoPoint) and isinstance(other, {SCALAR_label}constantTwoPoint): + return {SCALAR_label}constantTwoPoint(self.value*other.value) + elif isinstance(self, {SCALAR_label}parametrizedTwoPointFunction) or isinstance(other, {SCALAR_label}parametrizedTwoPointFunction): + return {SCALAR_label}productParametrizedTwoPoint(self, other) + elif isinstance(self, {SCALAR_label}constantTwoPoint) and isinstance(other, (float, {SCALAR})): + return {SCALAR_label}constantTwoPoint(self.value*other) + elif isinstance(other, {SCALAR_label}constantTwoPoint) and isinstance(self, (float, {SCALAR})): + return {SCALAR_label}constantTwoPoint(self*other.value) + else: + return {SCALAR_label}productTwoPoint(self, other) + + +cdef class {SCALAR_label}fixedTwoPointFunction({function_type}): + cdef: + {SCALAR_label}twoPointFunction f + REAL_t[::1] point + fixed_type fixedType + + def __init__(self, {SCALAR_label}twoPointFunction f, REAL_t[::1] point, fixed_type fixedType): + self.f = f + self.point = point + self.fixedType = fixedType + + cdef {SCALAR}_t eval(self, REAL_t[::1] x): + if self.fixedType == FIXED_X: + return self.f(self.point, x) + if self.fixedType == FIXED_Y: + return self.f(x, self.point) + else: + return self.f(x, x) + + +cdef class {SCALAR_label}productTwoPoint({SCALAR_label}twoPointFunction): + def __init__(self, {SCALAR_label}twoPointFunction f1, {SCALAR_label}twoPointFunction f2): + super(productTwoPoint, self).__init__(f1.symmetric and f2.symmetric) + self.f1 = f1 + self.f2 = f2 + + cdef {SCALAR}_t eval(self, REAL_t[::1] x, REAL_t[::1] y): + return self.f1.eval(x, y)*self.f2.eval(x, y) + + cdef {SCALAR}_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + return self.f1.evalPtr(dim, x, y)*self.f2.evalPtr(dim, x, y) + + def __repr__(self): + return '{}*{}'.format(self.f1, self.f2) + + def __getstate__(self): + return self.f1, self.f2 + + def __setstate__(self, state): + {SCALAR_label}productTwoPoint.__init__(self, state[0], state[1]) + + +cdef class {SCALAR_label}constantTwoPoint({SCALAR_label}twoPointFunction): + def __init__(self, {SCALAR}_t value): + super(constantTwoPoint, self).__init__(True) + self.value = value + + cdef {SCALAR}_t eval(self, REAL_t[::1] x, REAL_t[::1] y): + return self.value + + cdef {SCALAR}_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + return self.value + + def __repr__(self): + return '{}'.format(self.value) + + def __getstate__(self): + return self.value + + def __setstate__(self, state): + {SCALAR_label}constantTwoPoint.__init__(self, state) + + +cdef class {SCALAR_label}parametrizedTwoPointFunction({SCALAR_label}twoPointFunction): + def __init__(self, BOOL_t symmetric): + super({SCALAR_label}parametrizedTwoPointFunction, self).__init__(symmetric) + + cdef void setParams(self, void *params): + self.params = params + + cdef void* getParams(self): + return self.params + + +cdef class {SCALAR_label}productParametrizedTwoPoint({SCALAR_label}parametrizedTwoPointFunction): + def __init__(self, {SCALAR_label}twoPointFunction f1, {SCALAR_label}twoPointFunction f2): + super({SCALAR_label}productParametrizedTwoPoint, self).__init__(f1.symmetric and f2.symmetric) + self.f1 = f1 + self.f2 = f2 + + cdef void setParams(self, void *params): + cdef: + parametrizedTwoPointFunction f + if isinstance(self.f1, {SCALAR_label}parametrizedTwoPointFunction): + f = self.f1 + f.setParams(params) + if isinstance(self.f2, {SCALAR_label}parametrizedTwoPointFunction): + f = self.f2 + f.setParams(params) + + cdef {SCALAR}_t eval(self, REAL_t[::1] x, REAL_t[::1] y): + return self.f1.eval(x, y)*self.f2.eval(x, y) + + cdef {SCALAR}_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y): + return self.f1.evalPtr(dim, x, y)*self.f2.evalPtr(dim, x, y) + + def __repr__(self): + return '{}*{}'.format(self.f1, self.f2) + + def __getstate__(self): + return self.f1, self.f2 + + def __setstate__(self, state): + productParametrizedTwoPoint.__init__(self, state[0], state[1]) diff --git a/nl/setup.py b/nl/setup.py index 3a82cd0f..94f3b852 100644 --- a/nl/setup.py +++ b/nl/setup.py @@ -18,6 +18,8 @@ from PyNucleus_packageTools import package except ImportError as e: raise ImportError('\'PyNucleus_packageTools\' needs to be installed first.') from e +from PyNucleus_packageTools import fillTemplate +from pathlib import Path p = package('PyNucleus_nl') try: @@ -54,6 +56,32 @@ else: remove(p.folder+'bitset.pxd.temp') +print('Generating templates') +templates = [ + 'twoPointFunctions_{SCALAR}.pxi', 'twoPointFunctions_decl_{SCALAR}.pxi', + 'nonlocalLaplacianBase_{SCALAR}.pxi', 'nonlocalLaplacianBase_decl_{SCALAR}.pxi', + 'nonlocalLaplacian_{SCALAR}.pxi', 'nonlocalLaplacian_decl_{SCALAR}.pxi', +] +replacementGroups = [[('{SCALAR}', 'REAL'), + ('{SCALAR_label}', ''), + ('{SCALAR_label_lc}', ''), + ('{SCALAR_label_lc_}', ''), + ('{IS_REAL}', 'True'), + ('{IS_COMPLEX}', 'False'), + ('{function_type}', 'function')], + [('{SCALAR}', 'COMPLEX'), + ('{SCALAR_label}', 'Complex'), + ('{SCALAR_label_lc}', 'complex'), + ('{SCALAR_label_lc_}', 'complex_'), + ('{IS_REAL}', 'False'), + ('{IS_COMPLEX}', 'True'), + ('{function_type}', 'complexFunction'), + # for some reason, complex cannot handle += etc + ('\s([^\s]+\[[^\]]*\])\s([\*\+-])=', ' \\1 = \\1 \\2'), + ('\s([^\s]+)\s([\*\+-])=', ' \\1 = \\1 \\2')]] +fillTemplate(Path(p.folder), templates, replacementGroups) + + p.addExtension("bitset", sources=[p.folder+"bitset.pyx"], language='c++') diff --git a/tests/cache_runNonlocal.py--domaininterval--kernelTypeinverseDistance--problempoly-Dirichlet--solverlu--matrixFormatH2 b/tests/cache_runNonlocal.py--domaininterval--kernelTypeinverseDistance--problempoly-Dirichlet--solverlu--matrixFormatH2 index 597ca613..b6e41c6b 100644 --- a/tests/cache_runNonlocal.py--domaininterval--kernelTypeinverseDistance--problempoly-Dirichlet--solverlu--matrixFormatH2 +++ b/tests/cache_runNonlocal.py--domaininterval--kernelTypeinverseDistance--problempoly-Dirichlet--solverlu--matrixFormatH2 @@ -1,9 +1,9 @@ Timers: {} errors: - L2 error interpolated: 2.0677307416367313e-07 - Linf error interpolated: 2.2039557501241092e-07 - relative interpolated L2 error: 1.9793217005902312e-07 - relative interpolated Linf error: 2.2039557501241092e-07 + L2 error interpolated: 3.6453345420885625e-08 + Linf error interpolated: 3.885486987709186e-08 + relative interpolated L2 error: 3.4894725990076044e-08 + relative interpolated Linf error: 3.885486987709186e-08 meshes: {} results: {} vectors: {}