diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0873578d..294aa28e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,7 +15,7 @@ jobs: matrix: version: - '1.9' - - '1.10' + # - '1.10' # - 'nightly' os: - ubuntu-latest diff --git a/src/MiniKernels.jl b/src/MiniKernels.jl index a6ab56d7..edadf25d 100644 --- a/src/MiniKernels.jl +++ b/src/MiniKernels.jl @@ -2,17 +2,17 @@ const T2 = AbstractArray{T,2} where {T} # finite differences -@inline _d_xa(A::T, i, j, _dx) where {T<:T2} = (A[i + 1, j] - A[i, j]) * _dx -@inline _d_ya(A::T, i, j, _dy) where {T<:T2} = (A[i, j + 1] - A[i, j]) * _dy -@inline _d_xi(A::T, i, j, _dx) where {T<:T2} = (A[i + 1, j + 1] - A[i, j + 1]) * _dx -@inline _d_yi(A::T, i, j, _dy) where {T<:T2} = (A[i + 1, j + 1] - A[i + 1, j]) * _dy +@inline _d_xa(A::T, i, j, _dx) where {T<:T2} = (-A[i, j] + A[i + 1, j]) * _dx +@inline _d_ya(A::T, i, j, _dy) where {T<:T2} = (-A[i, j] + A[i, j + 1]) * _dy +@inline _d_xi(A::T, i, j, _dx) where {T<:T2} = (-A[i, j + 1] + A[i + 1, j + 1]) * _dx +@inline _d_yi(A::T, i, j, _dy) where {T<:T2} = (-A[i + 1, j] + A[i + 1, j + 1]) * _dy # averages @inline _av(A::T, i, j) where {T<:T2} = 0.25 * mysum(A, (i + 1):(i + 2), (j + 1):(j + 2)) @inline _av_a(A::T, i, j) where {T<:T2} = 0.25 * mysum(A, (i):(i + 1), (j):(j + 1)) -@inline _av_xa(A::T, i, j) where {T<:T2} = (A[i + 1, j] + A[i, j]) * 0.5 -@inline _av_ya(A::T, i, j) where {T<:T2} = (A[i, j + 1] + A[i, j]) * 0.5 -@inline _av_xi(A::T, i, j) where {T<:T2} = (A[i + 1, j + 1] + A[i, j + 1]) * 0.5 -@inline _av_yi(A::T, i, j) where {T<:T2} = (A[i + 1, j + 1] + A[i + 1, j]) * 0.5 +@inline _av_xa(A::T, i, j) where {T<:T2} = (A[i, j] + A[i + 1, j]) * 0.5 +@inline _av_ya(A::T, i, j) where {T<:T2} = (A[i, j] + A[i, j + 1]) * 0.5 +@inline _av_xi(A::T, i, j) where {T<:T2} = (A[i, j + 1], A[i + 1, j + 1]) * 0.5 +@inline _av_yi(A::T, i, j) where {T<:T2} = (A[i + 1, j], A[i + 1, j + 1]) * 0.5 # harmonic averages @inline function _harm(A::T, i, j) where {T<:T2} return eltype(A)(4) * mysum(inv, A, (i + 1):(i + 2), (j + 1):(j + 2)) @@ -32,23 +32,23 @@ end const T3 = AbstractArray{T,3} where {T} # finite differences -@inline _d_xa(A::T, i, j, k, _dx) where {T<:T3} = (A[i + 1, j, k] - A[i, j, k]) * _dx -@inline _d_ya(A::T, i, j, k, _dy) where {T<:T3} = (A[i, j + 1, k] - A[i, j, k]) * _dy -@inline _d_za(A::T, i, j, k, _dz) where {T<:T3} = (A[i, j, k + 1] - A[i, j, k]) * _dz +@inline _d_xa(A::T, i, j, k, _dx) where {T<:T3} = (-A[i, j, k] + A[i + 1, j, k]) * _dx +@inline _d_ya(A::T, i, j, k, _dy) where {T<:T3} = (-A[i, j, k] + A[i, j + 1, k]) * _dy +@inline _d_za(A::T, i, j, k, _dz) where {T<:T3} = (-A[i, j, k] + A[i, j, k + 1]) * _dz @inline function _d_xi(A::T, i, j, k, _dx) where {T<:T3} - return (A[i + 1, j + 1, k + 1] - A[i, j + 1, k + 1]) * _dx + return (-A[i, j + 1, k + 1] + A[i + 1, j + 1, k + 1]) * _dx end @inline function _d_yi(A::T, i, j, k, _dy) where {T<:T3} - return (A[i + 1, j + 1, k + 1] - A[i + 1, j, k + 1]) * _dy + return (-A[i + 1, j, k + 1] + A[i + 1, j + 1, k + 1]) * _dy end @inline function _d_zi(A::T, i, j, k, _dz) where {T<:T3} - return (A[i + 1, j + 1, k + 1] - A[i + 1, j + 1, k]) * _dz + return (-A[i + 1, j + 1, k] + A[i + 1, j + 1, k + 1]) * _dz end # averages @inline _av(A::T, i, j, k) where {T<:T3} = 0.125 * mysum(A, i:(i + 1), j:(j + 1), k:(k + 1)) -@inline _av_x(A::T, i, j, k) where {T<:T3} = 0.5 * (A[i + 1, j, k] + A[i, j, k]) -@inline _av_y(A::T, i, j, k) where {T<:T3} = 0.5 * (A[i, j + 1, k] + A[i, j, k]) -@inline _av_z(A::T, i, j, k) where {T<:T3} = 0.5 * (A[i, j, k + 1] + A[i, j, k]) +@inline _av_x(A::T, i, j, k) where {T<:T3} = 0.5 * (A[i, j, k] + A[i + 1, j, k]) +@inline _av_y(A::T, i, j, k) where {T<:T3} = 0.5 * (A[i, j, k] + A[i, j + 1, k]) +@inline _av_z(A::T, i, j, k) where {T<:T3} = 0.5 * (A[i, j, k] + A[i, j, k + 1]) @inline _av_xy(A::T, i, j, k) where {T<:T3} = 0.25 * mysum(A, i:(i + 1), j:(j + 1), k:k) @inline _av_xz(A::T, i, j, k) where {T<:T3} = 0.25 * mysum(A, i:(i + 1), j:j, k:(k + 1)) @inline _av_yz(A::T, i, j, k) where {T<:T3} = 0.25 * mysum(A, i:i, j:(j + 1), k:(k + 1)) @@ -57,13 +57,13 @@ end @inline _av_yzi(A::T, i, j, k) where {T<:T3} = 0.25 * mysum(A, i:i, (j - 1):j, (k - 1):k) # harmonic averages @inline function _harm_x(A::T, i, j, k) where {T<:T3} - return eltype(A)(2) * inv(inv(A[i + 1, j, k]) + inv(A[i, j, k])) + return eltype(A)(2) * inv(inv(A[i, j, k]) + inv(A[i + 1, j, k])) end @inline function _harm_y(A::T, i, j, k) where {T<:T3} - return eltype(A)(2) * inv(inv(A[i, j + 1, k]) + inv(A[i, j, k])) + return eltype(A)(2) * inv(inv(A[i, j, k]) + inv(A[i, j + 1, k])) end @inline function _harm_z(A::T, i, j, k) where {T<:T3} - return eltype(A)(2) * inv(inv(A[i, j, k + 1]) + inv(A[i, j, k])) + return eltype(A)(2) * inv(inv(A[i, j, k]) + inv(A[i, j, k + 1])) end @inline function _harm_xy(A::T, i, j, k) where {T<:T3} return eltype(A)(4) * inv(mysum(A, i:(i + 1), j:(j + 1), k:k)) @@ -97,9 +97,9 @@ end @inline _current(A::T, i, j, k) where {T<:T3} = A[i, j, k] ## Because mysum(::generator) does not work inside CUDA kernels... -mysum(A, ranges::Vararg{T,N}) where {T,N} = mysum(identity, A, ranges...) +@inline mysum(A, ranges::Vararg{T,N}) where {T,N} = mysum(identity, A, ranges...) -function mysum(f::F, A, ranges_i) where {F<:Function} +@inline function mysum(f::F, A, ranges_i) where {F<:Function} s = 0.0 for i in ranges_i s += f(A[i]) @@ -107,7 +107,7 @@ function mysum(f::F, A, ranges_i) where {F<:Function} return s end -function mysum(f::F, A, ranges_i, ranges_j) where {F<:Function} +@inline function mysum(f::F, A, ranges_i, ranges_j) where {F<:Function} s = 0.0 for i in ranges_i, j in ranges_j s += f(A[i, j]) @@ -115,7 +115,7 @@ function mysum(f::F, A, ranges_i, ranges_j) where {F<:Function} return s end -function mysum(f::F, A, ranges_i, ranges_j, ranges_k) where {F<:Function} +@inline function mysum(f::F, A, ranges_i, ranges_j, ranges_k) where {F<:Function} s = 0.0 for i in ranges_i, j in ranges_j, k in ranges_k s += f(A[i, j, k]) diff --git a/src/rheology/StressUpdate.jl b/src/rheology/StressUpdate.jl index 373c8452..17e3c1cc 100644 --- a/src/rheology/StressUpdate.jl +++ b/src/rheology/StressUpdate.jl @@ -126,12 +126,13 @@ function plastic_params_phase( is_pl = false C = sinϕ = cosϕ = sinψ = η_reg = 0.0 for n in 1:N + ratio_n = ratio[n] data[n][1] && (is_pl = true) - C += data[n][2] * ratio[n] - sinϕ += data[n][3] * ratio[n] - cosϕ += data[n][4] * ratio[n] - sinψ += data[n][5] * ratio[n] - η_reg += data[n][6] * ratio[n] + C += data[n][2] * ratio_n + sinϕ += data[n][3] * ratio_n + cosϕ += data[n][4] * ratio_n + sinψ += data[n][5] * ratio_n + η_reg += data[n][6] * ratio_n end return is_pl, C, sinϕ, cosϕ, sinψ, η_reg end diff --git a/src/rheology/Viscosity.jl b/src/rheology/Viscosity.jl index 6fb0799c..3d9c42af 100644 --- a/src/rheology/Viscosity.jl +++ b/src/rheology/Viscosity.jl @@ -8,14 +8,17 @@ @inline gather(A) = _gather(A, I...) @inbounds begin + # cache + ε = εxx[I...], εyy[I...] + # we need strain rate not to be zero, otherwise we get NaNs - εII_0 = allzero(εxx[I...], εyy[I...]) * 1e-15 + εII_0 = allzero(ε...) * 1e-15 # argument fields at local index args_ij = local_viscosity_args(args, I...) # compute second invariant of strain rate tensor - εij = εII_0 + εxx[I...], -εII_0 + εyy[I...], gather(εxyv) + εij = εII_0 + ε[1], -εII_0 + ε[1], gather(εxyv) εII = second_invariant(εij...) # compute and update stress viscosity @@ -52,8 +55,11 @@ end @inline gather(A) = _gather(A, I...) @inbounds begin + # cache + ε = εxx[I...], εyy[I...] + # we need strain rate not to be zero, otherwise we get NaNs - εII_0 = allzero(εxx[I...], εyy[I...]) * 1e-18 + εII_0 = allzero(ε...) * 1e-18 # argument fields at local index args_ij = local_viscosity_args(args, I...) @@ -62,7 +68,7 @@ end ratio_ij = ratios_center[I...] # compute second invariant of strain rate tensor - εij = εII_0 + εxx[I...], -εII_0 + εyy[I...], gather(εxyv) + εij = εII_0 + ε[1], -εII_0 + ε[1], gather(εxyv) εII = second_invariant(εij...) # compute and update stress viscosity @@ -86,14 +92,15 @@ end @inline gather_xy(A) = _gather_xy(A, I...) @inbounds begin + εij_normal = εxx[I...], εyy[I...], εzz[I...] + # we need strain rate not to be zero, otherwise we get NaNs - εII_0 = allzero(εxx[I...], εyy[I...], εzz[I...]) * 1e-18 + εII_0 = allzero(εij_normal...) * 1e-18 # # argument fields at local index args_ijk = local_viscosity_args(args, I...) # compute second invariant of strain rate tensor - εij_normal = εxx[I...], εyy[I...], εzz[I...] εij_normal = εij_normal .+ (εII_0, -εII_0 * 0.5, -εII_0 * 0.5) εij_shear = gather_yz(εyzv), gather_xz(εxzv), gather_xy(εxyv) εij = (εij_normal..., εij_shear...) @@ -135,8 +142,10 @@ end @inline gather_xy(A) = _gather_xy(A, I...) @inbounds begin + εij_normal = εxx[I...], εyy[I...], εzz[I...] + # we need strain rate not to be zero, otherwise we get NaNs - εII_0 = allzero(εxx[I...], εyy[I...], εzz[I...]) * 1e-18 + εII_0 = allzero(εij_normal...) * 1e-18 # # argument fields at local index args_ijk = local_viscosity_args(args, I...) @@ -145,7 +154,6 @@ end ratio_ijk = ratios_center[I...] # compute second invariant of strain rate tensor - εij_normal = εxx[I...], εyy[I...], εzz[I...] εij_normal = εij_normal .+ (εII_0, -εII_0 * 0.5, -εII_0 * 0.5) εij_shear = gather_yz(εyzv), gather_xz(εxzv), gather_xy(εxyv) εij = (εij_normal..., εij_shear...) diff --git a/src/stokes/StressKernels.jl b/src/stokes/StressKernels.jl index 3ac47590..93c42fc8 100644 --- a/src/stokes/StressKernels.jl +++ b/src/stokes/StressKernels.jl @@ -14,7 +14,7 @@ # Shear components if all((i, j) .< size(τxy) .- 1) τxy[i + 1, j + 1] += - (-τxy[i + 1, j + 1] + 2.0 * @av(η) * εxy[i + 1, j + 1]) * denominator + (-τxy[i + 1, j + 1] + 2.0 * av(η) * εxy[i + 1, j + 1]) * denominator end return nothing