Skip to content

Commit 9f86e3e

Browse files
efaulhabersvchb
andauthored
Implement option to transpose neighbor lists of PrecomputedNeighborhoodSearch (#137)
* Implement option to transpose neighbor lists of `PrecomputedNeighborhoodSearch` * Improve docs * Improve error * Reformat --------- Co-authored-by: Sven Berger <berger.sven@gmail.com>
1 parent d8b0de2 commit 9f86e3e

File tree

8 files changed

+98
-14
lines changed

8 files changed

+98
-14
lines changed

src/PointNeighbors.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ include("gpu.jl")
2323
export foreach_point_neighbor, foreach_neighbor
2424
export TrivialNeighborhoodSearch, GridNeighborhoodSearch, PrecomputedNeighborhoodSearch
2525
export DictionaryCellList, FullGridCellList, SpatialHashingCellList
26+
export DynamicVectorOfVectors
2627
export ParallelUpdate, SemiParallelUpdate, SerialIncrementalUpdate, SerialUpdate,
2728
ParallelIncrementalUpdate
2829
export requires_update

src/cell_lists/cell_lists.jl

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,20 @@ abstract type AbstractCellList end
1111
end
1212

1313
function construct_backend(::Type{Vector{Vector{T}}},
14-
max_outer_length,
15-
max_inner_length) where {T}
14+
max_outer_length, max_inner_length;
15+
transpose_backend = false) where {T}
16+
if transpose_backend
17+
error("transpose backend is only supported for DynamicVectorOfVectors backend")
18+
end
19+
1620
return [T[] for _ in 1:max_outer_length]
1721
end
1822

1923
function construct_backend(::Type{DynamicVectorOfVectors{T}},
20-
max_outer_length,
21-
max_inner_length) where {T}
22-
cells = DynamicVectorOfVectors{T}(max_outer_length = max_outer_length,
23-
max_inner_length = max_inner_length)
24+
max_outer_length, max_inner_length;
25+
transpose_backend = false) where {T}
26+
cells = DynamicVectorOfVectors{T}(; max_outer_length, max_inner_length,
27+
transpose_backend)
2428
resize!(cells, max_outer_length)
2529

2630
return cells
@@ -30,10 +34,11 @@ end
3034
# `DynamicVectorOfVectors{T}`, but a type `DynamicVectorOfVectors{T1, T2, T3, T4}`.
3135
# While `A{T} <: A{T1, T2}`, this doesn't hold for the types.
3236
# `Type{A{T}} <: Type{A{T1, T2}}` is NOT true.
33-
function construct_backend(::Type{DynamicVectorOfVectors{T1, T2, T3, T4}}, max_outer_length,
34-
max_inner_length) where {T1, T2, T3, T4}
37+
function construct_backend(::Type{DynamicVectorOfVectors{T1, T2, T3, T4}},
38+
max_outer_length, max_inner_length;
39+
transpose_backend = false) where {T1, T2, T3, T4}
3540
return construct_backend(DynamicVectorOfVectors{T1}, max_outer_length,
36-
max_inner_length)
41+
max_inner_length; transpose_backend)
3742
end
3843

3944
function max_points_per_cell(cells::DynamicVectorOfVectors)

src/nhs_grid.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ since not sorting makes our implementation a lot faster (although less paralleli
3030
# Keywords
3131
- `search_radius = 0.0`: The fixed search radius. The default of `0.0` is useful together
3232
with [`copy_neighborhood_search`](@ref).
33+
Note that the type of `search_radius` determines the type used
34+
for the distance computations.
3335
- `n_points = 0`: Total number of points. The default of `0` is useful together
3436
with [`copy_neighborhood_search`](@ref).
3537
- `periodic_box = nothing`: In order to use a (rectangular) periodic domain, pass a

src/nhs_precomputed.jl

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
periodic_box = nothing, update_strategy = nothing,
44
update_neighborhood_search = GridNeighborhoodSearch{NDIMS}(),
55
backend = DynamicVectorOfVectors{Int32},
6-
max_neighbors = max_neighbors(NDIMS),
6+
transpose_backend = false,
7+
max_neighbors = max_neighbors(NDIMS))
78
sort_neighbor_lists = true)
89
910
Neighborhood search with precomputed neighbor lists. A list of all neighbors is computed
@@ -23,6 +24,8 @@ to strip the internal neighborhood search, which is not needed anymore.
2324
# Keywords
2425
- `search_radius = 0.0`: The fixed search radius. The default of `0.0` is useful together
2526
with [`copy_neighborhood_search`](@ref).
27+
Note that the type of `search_radius` determines the type used
28+
for the distance computations.
2629
- `n_points = 0`: Total number of points. The default of `0` is useful together
2730
with [`copy_neighborhood_search`](@ref).
2831
- `periodic_box = nothing`: In order to use a (rectangular) periodic domain, pass a
@@ -42,6 +45,18 @@ to strip the internal neighborhood search, which is not needed anymore.
4245
- `Vector{Vector{Int32}}`: Scattered memory, but very memory-efficient.
4346
- `DynamicVectorOfVectors{Int32}`: Contiguous memory, optimizing cache-hits
4447
and GPU-compatible.
48+
- `transpose_backend = false`: Whether to transpose the backend data structure storing the
49+
neighbor lists. This is only supported for the
50+
`DynamicVectorOfVectors` backend.
51+
By default, the neighbors of each point are stored contiguously
52+
in memory. This layout optimizes cache hits when looping
53+
over all neighbors of a point on CPUs.
54+
On GPUs, however, storing all first neighbors of all points
55+
contiguously in memory, then all second neighbors, etc.,
56+
(`transpose_backend = true`) allows for coalesced
57+
memory accesses when all threads process the n-th neighbor
58+
of their respective point in parallel.
59+
This can lead to a speedup of ~3x in many cases.
4560
- `max_neighbors`: Maximum number of neighbors per particle. This will be used to
4661
allocate the `DynamicVectorOfVectors`. It is not used with
4762
other backends. The default is 64 in 2D and 324 in 3D.
@@ -80,9 +95,10 @@ function PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points =
8095
periodic_box,
8196
update_strategy),
8297
backend = DynamicVectorOfVectors{Int32},
98+
transpose_backend = false,
8399
max_neighbors = max_neighbors(NDIMS),
84100
sort_neighbor_lists = true) where {NDIMS}
85-
neighbor_lists = construct_backend(backend, n_points, max_neighbors)
101+
neighbor_lists = construct_backend(backend, n_points, max_neighbors; transpose_backend)
86102

87103
PrecomputedNeighborhoodSearch{NDIMS}(neighbor_lists, search_radius,
88104
periodic_box, update_neighborhood_search,
@@ -225,10 +241,12 @@ function copy_neighborhood_search(nhs::PrecomputedNeighborhoodSearch,
225241
# For `Vector{Vector}` backend use `max_neighbors(NDIMS)` as fallback.
226242
# This should never be used because this backend doesn't require a `max_neighbors`.
227243
max_neighbors_ = max_inner_length(nhs.neighbor_lists, max_neighbors(ndims(nhs)))
244+
transpose_backend = transposed_backend(nhs.neighbor_lists)
228245
return PrecomputedNeighborhoodSearch{ndims(nhs)}(; search_radius, n_points,
229246
periodic_box = nhs.periodic_box,
230247
update_neighborhood_search,
231248
backend = typeof(nhs.neighbor_lists),
249+
transpose_backend,
232250
max_neighbors = max_neighbors_)
233251
end
234252

src/nhs_trivial.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ Trivial neighborhood search that simply loops over all points.
1010
# Keywords
1111
- `search_radius = 0.0`: The fixed search radius. The default of `0.0` is useful together
1212
with [`copy_neighborhood_search`](@ref).
13+
Note that the type of `search_radius` determines the type used
14+
for the distance computations.
1315
- `eachpoint = 1:0`: Iterator for all point indices. Usually just `1:n_points`.
1416
The default of `1:0` is useful together with
1517
[`copy_neighborhood_search`](@ref).

src/vector_of_vectors.jl

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,17 @@ struct DynamicVectorOfVectors{T, ARRAY2D, ARRAY1D, L} <: AbstractVector{Array{T,
1313
end
1414
end
1515

16-
function DynamicVectorOfVectors{T}(; max_outer_length, max_inner_length) where {T}
17-
backend = Array{T, 2}(undef, max_inner_length, max_outer_length)
16+
function DynamicVectorOfVectors{T}(; max_outer_length, max_inner_length,
17+
transpose_backend = false) where {T}
18+
if transpose_backend
19+
# Create a row-major array
20+
backend_ = Array{T, 2}(undef, max_outer_length, max_inner_length)
21+
# Wrap in a `PermutedDimsArray` to obtain the usual column-major access,
22+
# even though the underlying memory layout is row-major.
23+
backend = PermutedDimsArray(backend_, (2, 1))
24+
else
25+
backend = Array{T, 2}(undef, max_inner_length, max_outer_length)
26+
end
1827
length_ = Ref(zero(Int32))
1928
lengths = zeros(Int32, max_outer_length)
2029

@@ -203,6 +212,14 @@ function max_inner_length(cells::DynamicVectorOfVectors, fallback)
203212
end
204213

205214
# Fallback when backend is a `Vector{Vector{T}}`. Only used for copying the cell list.
206-
function max_inner_length(::Any, fallback)
215+
@inline function max_inner_length(::Any, fallback)
207216
return fallback
208217
end
218+
219+
@inline function transposed_backend(::DynamicVectorOfVectors{<:Any, <:PermutedDimsArray})
220+
return true
221+
end
222+
223+
@inline function transposed_backend(::Any)
224+
return false
225+
end

test/nhs_precomputed.jl

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
@testset verbose=true "PrecomputedNeighborhoodSearch" begin
2+
# Test regular vs transposed backend
3+
nhs = PrecomputedNeighborhoodSearch{2}(transpose_backend = false, n_points = 2)
4+
5+
# Add test neighbors
6+
neighbor_lists = nhs.neighbor_lists
7+
@test PointNeighbors.transposed_backend(neighbor_lists) == false
8+
neighbor_lists.backend .= 0
9+
PointNeighbors.pushat!(neighbor_lists, 1, 101)
10+
PointNeighbors.pushat!(neighbor_lists, 1, 102)
11+
PointNeighbors.pushat!(neighbor_lists, 2, 201)
12+
PointNeighbors.pushat!(neighbor_lists, 2, 202)
13+
PointNeighbors.pushat!(neighbor_lists, 2, 203)
14+
15+
# Check that neighbors are next to each other in memory
16+
pointer_ = pointer(neighbor_lists.backend)
17+
@test unsafe_load(pointer_, 1) == 101
18+
@test unsafe_load(pointer_, 2) == 102
19+
20+
# Transposed backend
21+
nhs = PrecomputedNeighborhoodSearch{2}(transpose_backend = true, n_points = 2)
22+
23+
# Add test neighbors
24+
neighbor_lists = nhs.neighbor_lists
25+
@test PointNeighbors.transposed_backend(neighbor_lists) == true
26+
neighbor_lists.backend .= 0
27+
PointNeighbors.pushat!(neighbor_lists, 1, 101)
28+
PointNeighbors.pushat!(neighbor_lists, 1, 102)
29+
PointNeighbors.pushat!(neighbor_lists, 2, 201)
30+
PointNeighbors.pushat!(neighbor_lists, 2, 202)
31+
PointNeighbors.pushat!(neighbor_lists, 2, 203)
32+
33+
# Check that first neighbors are next to each other in memory
34+
@test neighbor_lists.backend isa PermutedDimsArray
35+
pointer_ = pointer(neighbor_lists.backend.parent)
36+
@test unsafe_load(pointer_, 1) == 101
37+
@test unsafe_load(pointer_, 2) == 201
38+
end

test/unittest.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
include("vector_of_vectors.jl")
55
include("nhs_trivial.jl")
66
include("nhs_grid.jl")
7+
include("nhs_precomputed.jl")
78
include("neighborhood_search.jl")
89
include("cell_lists/full_grid.jl")
910
end;

0 commit comments

Comments
 (0)