Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve access pattern #66

Merged
merged 4 commits into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
matrix:
version:
- '1.9'
- '1.10'
# - '1.10'
# - 'nightly'
os:
- ubuntu-latest
Expand Down
48 changes: 24 additions & 24 deletions src/MiniKernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@
const T2 = AbstractArray{T,2} where {T}

# finite differences
@inline _d_xa(A::T, i, j, _dx) where {T<:T2} = (A[i + 1, j] - A[i, j]) * _dx
@inline _d_ya(A::T, i, j, _dy) where {T<:T2} = (A[i, j + 1] - A[i, j]) * _dy
@inline _d_xi(A::T, i, j, _dx) where {T<:T2} = (A[i + 1, j + 1] - A[i, j + 1]) * _dx
@inline _d_yi(A::T, i, j, _dy) where {T<:T2} = (A[i + 1, j + 1] - A[i + 1, j]) * _dy
@inline _d_xa(A::T, i, j, _dx) where {T<:T2} = (-A[i, j] + A[i + 1, j]) * _dx
@inline _d_ya(A::T, i, j, _dy) where {T<:T2} = (-A[i, j] + A[i, j + 1]) * _dy
@inline _d_xi(A::T, i, j, _dx) where {T<:T2} = (-A[i, j + 1] + A[i + 1, j + 1]) * _dx
@inline _d_yi(A::T, i, j, _dy) where {T<:T2} = (-A[i + 1, j] + A[i + 1, j + 1]) * _dy
# averages
@inline _av(A::T, i, j) where {T<:T2} = 0.25 * mysum(A, (i + 1):(i + 2), (j + 1):(j + 2))
@inline _av_a(A::T, i, j) where {T<:T2} = 0.25 * mysum(A, (i):(i + 1), (j):(j + 1))
@inline _av_xa(A::T, i, j) where {T<:T2} = (A[i + 1, j] + A[i, j]) * 0.5
@inline _av_ya(A::T, i, j) where {T<:T2} = (A[i, j + 1] + A[i, j]) * 0.5
@inline _av_xi(A::T, i, j) where {T<:T2} = (A[i + 1, j + 1] + A[i, j + 1]) * 0.5
@inline _av_yi(A::T, i, j) where {T<:T2} = (A[i + 1, j + 1] + A[i + 1, j]) * 0.5
@inline _av_xa(A::T, i, j) where {T<:T2} = (A[i, j] + A[i + 1, j]) * 0.5
@inline _av_ya(A::T, i, j) where {T<:T2} = (A[i, j] + A[i, j + 1]) * 0.5
@inline _av_xi(A::T, i, j) where {T<:T2} = (A[i, j + 1], A[i + 1, j + 1]) * 0.5
@inline _av_yi(A::T, i, j) where {T<:T2} = (A[i + 1, j], A[i + 1, j + 1]) * 0.5
# harmonic averages
@inline function _harm(A::T, i, j) where {T<:T2}
return eltype(A)(4) * mysum(inv, A, (i + 1):(i + 2), (j + 1):(j + 2))
Expand All @@ -32,23 +32,23 @@ end
const T3 = AbstractArray{T,3} where {T}

# finite differences
@inline _d_xa(A::T, i, j, k, _dx) where {T<:T3} = (A[i + 1, j, k] - A[i, j, k]) * _dx
@inline _d_ya(A::T, i, j, k, _dy) where {T<:T3} = (A[i, j + 1, k] - A[i, j, k]) * _dy
@inline _d_za(A::T, i, j, k, _dz) where {T<:T3} = (A[i, j, k + 1] - A[i, j, k]) * _dz
@inline _d_xa(A::T, i, j, k, _dx) where {T<:T3} = (-A[i, j, k] + A[i + 1, j, k]) * _dx
@inline _d_ya(A::T, i, j, k, _dy) where {T<:T3} = (-A[i, j, k] + A[i, j + 1, k]) * _dy
@inline _d_za(A::T, i, j, k, _dz) where {T<:T3} = (-A[i, j, k] + A[i, j, k + 1]) * _dz
@inline function _d_xi(A::T, i, j, k, _dx) where {T<:T3}
return (A[i + 1, j + 1, k + 1] - A[i, j + 1, k + 1]) * _dx
return (-A[i, j + 1, k + 1] + A[i + 1, j + 1, k + 1]) * _dx
end
@inline function _d_yi(A::T, i, j, k, _dy) where {T<:T3}
return (A[i + 1, j + 1, k + 1] - A[i + 1, j, k + 1]) * _dy
return (-A[i + 1, j, k + 1] + A[i + 1, j + 1, k + 1]) * _dy
end
@inline function _d_zi(A::T, i, j, k, _dz) where {T<:T3}
return (A[i + 1, j + 1, k + 1] - A[i + 1, j + 1, k]) * _dz
return (-A[i + 1, j + 1, k] + A[i + 1, j + 1, k + 1]) * _dz
end
# averages
@inline _av(A::T, i, j, k) where {T<:T3} = 0.125 * mysum(A, i:(i + 1), j:(j + 1), k:(k + 1))
@inline _av_x(A::T, i, j, k) where {T<:T3} = 0.5 * (A[i + 1, j, k] + A[i, j, k])
@inline _av_y(A::T, i, j, k) where {T<:T3} = 0.5 * (A[i, j + 1, k] + A[i, j, k])
@inline _av_z(A::T, i, j, k) where {T<:T3} = 0.5 * (A[i, j, k + 1] + A[i, j, k])
@inline _av_x(A::T, i, j, k) where {T<:T3} = 0.5 * (A[i, j, k] + A[i + 1, j, k])
@inline _av_y(A::T, i, j, k) where {T<:T3} = 0.5 * (A[i, j, k] + A[i, j + 1, k])
@inline _av_z(A::T, i, j, k) where {T<:T3} = 0.5 * (A[i, j, k] + A[i, j, k + 1])
@inline _av_xy(A::T, i, j, k) where {T<:T3} = 0.25 * mysum(A, i:(i + 1), j:(j + 1), k:k)
@inline _av_xz(A::T, i, j, k) where {T<:T3} = 0.25 * mysum(A, i:(i + 1), j:j, k:(k + 1))
@inline _av_yz(A::T, i, j, k) where {T<:T3} = 0.25 * mysum(A, i:i, j:(j + 1), k:(k + 1))
Expand All @@ -57,13 +57,13 @@ end
@inline _av_yzi(A::T, i, j, k) where {T<:T3} = 0.25 * mysum(A, i:i, (j - 1):j, (k - 1):k)
# harmonic averages
@inline function _harm_x(A::T, i, j, k) where {T<:T3}
return eltype(A)(2) * inv(inv(A[i + 1, j, k]) + inv(A[i, j, k]))
return eltype(A)(2) * inv(inv(A[i, j, k]) + inv(A[i + 1, j, k]))
end
@inline function _harm_y(A::T, i, j, k) where {T<:T3}
return eltype(A)(2) * inv(inv(A[i, j + 1, k]) + inv(A[i, j, k]))
return eltype(A)(2) * inv(inv(A[i, j, k]) + inv(A[i, j + 1, k]))
end
@inline function _harm_z(A::T, i, j, k) where {T<:T3}
return eltype(A)(2) * inv(inv(A[i, j, k + 1]) + inv(A[i, j, k]))
return eltype(A)(2) * inv(inv(A[i, j, k]) + inv(A[i, j, k + 1]))
end
@inline function _harm_xy(A::T, i, j, k) where {T<:T3}
return eltype(A)(4) * inv(mysum(A, i:(i + 1), j:(j + 1), k:k))
Expand Down Expand Up @@ -97,25 +97,25 @@ end
@inline _current(A::T, i, j, k) where {T<:T3} = A[i, j, k]

## Because mysum(::generator) does not work inside CUDA kernels...
mysum(A, ranges::Vararg{T,N}) where {T,N} = mysum(identity, A, ranges...)
@inline mysum(A, ranges::Vararg{T,N}) where {T,N} = mysum(identity, A, ranges...)

function mysum(f::F, A, ranges_i) where {F<:Function}
@inline function mysum(f::F, A, ranges_i) where {F<:Function}
s = 0.0
for i in ranges_i
s += f(A[i])
end
return s
end

function mysum(f::F, A, ranges_i, ranges_j) where {F<:Function}
@inline function mysum(f::F, A, ranges_i, ranges_j) where {F<:Function}
s = 0.0
for i in ranges_i, j in ranges_j
s += f(A[i, j])
end
return s
end

function mysum(f::F, A, ranges_i, ranges_j, ranges_k) where {F<:Function}
@inline function mysum(f::F, A, ranges_i, ranges_j, ranges_k) where {F<:Function}
s = 0.0
for i in ranges_i, j in ranges_j, k in ranges_k
s += f(A[i, j, k])
Expand Down
11 changes: 6 additions & 5 deletions src/rheology/StressUpdate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,13 @@ function plastic_params_phase(
is_pl = false
C = sinϕ = cosϕ = sinψ = η_reg = 0.0
for n in 1:N
ratio_n = ratio[n]
data[n][1] && (is_pl = true)
C += data[n][2] * ratio[n]
sinϕ += data[n][3] * ratio[n]
cosϕ += data[n][4] * ratio[n]
sinψ += data[n][5] * ratio[n]
η_reg += data[n][6] * ratio[n]
C += data[n][2] * ratio_n
sinϕ += data[n][3] * ratio_n
cosϕ += data[n][4] * ratio_n
sinψ += data[n][5] * ratio_n
η_reg += data[n][6] * ratio_n
end
return is_pl, C, sinϕ, cosϕ, sinψ, η_reg
end
Expand Down
24 changes: 16 additions & 8 deletions src/rheology/Viscosity.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@
@inline gather(A) = _gather(A, I...)

@inbounds begin
# cache
ε = εxx[I...], εyy[I...]

# we need strain rate not to be zero, otherwise we get NaNs
εII_0 = allzero(εxx[I...], εyy[I...]) * 1e-15
εII_0 = allzero(ε...) * 1e-15

# argument fields at local index
args_ij = local_viscosity_args(args, I...)

# compute second invariant of strain rate tensor
εij = εII_0 + εxx[I...], -εII_0 + εyy[I...], gather(εxyv)
εij = εII_0 + ε[1], -εII_0 + ε[1], gather(εxyv)
εII = second_invariant(εij...)

# compute and update stress viscosity
Expand Down Expand Up @@ -52,8 +55,11 @@ end
@inline gather(A) = _gather(A, I...)

@inbounds begin
# cache
ε = εxx[I...], εyy[I...]

# we need strain rate not to be zero, otherwise we get NaNs
εII_0 = allzero(εxx[I...], εyy[I...]) * 1e-18
εII_0 = allzero(ε...) * 1e-18

# argument fields at local index
args_ij = local_viscosity_args(args, I...)
Expand All @@ -62,7 +68,7 @@ end
ratio_ij = ratios_center[I...]

# compute second invariant of strain rate tensor
εij = εII_0 + εxx[I...], -εII_0 + εyy[I...], gather(εxyv)
εij = εII_0 + ε[1], -εII_0 + ε[1], gather(εxyv)
εII = second_invariant(εij...)

# compute and update stress viscosity
Expand All @@ -86,14 +92,15 @@ end
@inline gather_xy(A) = _gather_xy(A, I...)

@inbounds begin
εij_normal = εxx[I...], εyy[I...], εzz[I...]

# we need strain rate not to be zero, otherwise we get NaNs
εII_0 = allzero(εxx[I...], εyy[I...], εzz[I...]) * 1e-18
εII_0 = allzero(εij_normal...) * 1e-18

# # argument fields at local index
args_ijk = local_viscosity_args(args, I...)

# compute second invariant of strain rate tensor
εij_normal = εxx[I...], εyy[I...], εzz[I...]
εij_normal = εij_normal .+ (εII_0, -εII_0 * 0.5, -εII_0 * 0.5)
εij_shear = gather_yz(εyzv), gather_xz(εxzv), gather_xy(εxyv)
εij = (εij_normal..., εij_shear...)
Expand Down Expand Up @@ -135,8 +142,10 @@ end
@inline gather_xy(A) = _gather_xy(A, I...)

@inbounds begin
εij_normal = εxx[I...], εyy[I...], εzz[I...]

# we need strain rate not to be zero, otherwise we get NaNs
εII_0 = allzero(εxx[I...], εyy[I...], εzz[I...]) * 1e-18
εII_0 = allzero(εij_normal...) * 1e-18

# # argument fields at local index
args_ijk = local_viscosity_args(args, I...)
Expand All @@ -145,7 +154,6 @@ end
ratio_ijk = ratios_center[I...]

# compute second invariant of strain rate tensor
εij_normal = εxx[I...], εyy[I...], εzz[I...]
εij_normal = εij_normal .+ (εII_0, -εII_0 * 0.5, -εII_0 * 0.5)
εij_shear = gather_yz(εyzv), gather_xz(εxzv), gather_xy(εxyv)
εij = (εij_normal..., εij_shear...)
Expand Down
2 changes: 1 addition & 1 deletion src/stokes/StressKernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# Shear components
if all((i, j) .< size(τxy) .- 1)
τxy[i + 1, j + 1] +=
(-τxy[i + 1, j + 1] + 2.0 * @av(η) * εxy[i + 1, j + 1]) * denominator
(-τxy[i + 1, j + 1] + 2.0 * av(η) * εxy[i + 1, j + 1]) * denominator
end

return nothing
Expand Down