33 periodic_box = nothing, update_strategy = nothing,
44 update_neighborhood_search = GridNeighborhoodSearch{NDIMS}(),
55 backend = DynamicVectorOfVectors{Int32},
6- max_neighbors = max_neighbors(NDIMS),
6+ transpose_backend = false,
7+ max_neighbors = max_neighbors(NDIMS))
78 sort_neighbor_lists = true)
89
910Neighborhood search with precomputed neighbor lists. A list of all neighbors is computed
@@ -23,6 +24,8 @@ to strip the internal neighborhood search, which is not needed anymore.
2324# Keywords
2425- `search_radius = 0.0`: The fixed search radius. The default of `0.0` is useful together
2526 with [`copy_neighborhood_search`](@ref).
27+ Note that the type of `search_radius` determines the type used
28+ for the distance computations.
2629- `n_points = 0`: Total number of points. The default of `0` is useful together
2730 with [`copy_neighborhood_search`](@ref).
2831- `periodic_box = nothing`: In order to use a (rectangular) periodic domain, pass a
@@ -42,6 +45,18 @@ to strip the internal neighborhood search, which is not needed anymore.
4245 - `Vector{Vector{Int32}}`: Scattered memory, but very memory-efficient.
4346 - `DynamicVectorOfVectors{Int32}`: Contiguous memory, optimizing cache-hits
4447 and GPU-compatible.
48+ - `transpose_backend = false`: Whether to transpose the backend data structure storing the
49+ neighbor lists. This is only supported for the
50+ `DynamicVectorOfVectors` backend.
51+ By default, the neighbors of each point are stored contiguously
52+ in memory. This layout optimizes cache hits when looping
53+ over all neighbors of a point on CPUs.
54+ On GPUs, however, storing all first neighbors of all points
55+ contiguously in memory, then all second neighbors, etc.,
56+ (`transpose_backend = true`) allows for coalesced
57+ memory accesses when all threads process the n-th neighbor
58+ of their respective point in parallel.
59+ This can lead to a speedup of ~3x in many cases.
4560- `max_neighbors`: Maximum number of neighbors per particle. This will be used to
4661 allocate the `DynamicVectorOfVectors`. It is not used with
4762 other backends. The default is 64 in 2D and 324 in 3D.
@@ -80,9 +95,10 @@ function PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points =
8095 periodic_box,
8196 update_strategy),
8297 backend = DynamicVectorOfVectors{Int32},
98+ transpose_backend = false ,
8399 max_neighbors = max_neighbors (NDIMS),
84100 sort_neighbor_lists = true ) where {NDIMS}
85- neighbor_lists = construct_backend (backend, n_points, max_neighbors)
101+ neighbor_lists = construct_backend (backend, n_points, max_neighbors; transpose_backend )
86102
87103 PrecomputedNeighborhoodSearch {NDIMS} (neighbor_lists, search_radius,
88104 periodic_box, update_neighborhood_search,
@@ -225,10 +241,12 @@ function copy_neighborhood_search(nhs::PrecomputedNeighborhoodSearch,
225241 # For `Vector{Vector}` backend use `max_neighbors(NDIMS)` as fallback.
226242 # This should never be used because this backend doesn't require a `max_neighbors`.
227243 max_neighbors_ = max_inner_length (nhs. neighbor_lists, max_neighbors (ndims (nhs)))
244+ transpose_backend = transposed_backend (nhs. neighbor_lists)
228245 return PrecomputedNeighborhoodSearch {ndims(nhs)} (; search_radius, n_points,
229246 periodic_box = nhs. periodic_box,
230247 update_neighborhood_search,
231248 backend = typeof (nhs. neighbor_lists),
249+ transpose_backend,
232250 max_neighbors = max_neighbors_)
233251end
234252
0 commit comments