Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
5a4f31a
Implemented MeshManager level versions of get_surface_vertices() + ge…
Waqar-ukaea Feb 24, 2026
1c39076
Refactored get_volume_vertices() and get_volume_connectivity() to be …
Waqar-ukaea Feb 24, 2026
a17d5db
Some more cleanup
Waqar-ukaea Feb 25, 2026
c89e498
Refactored tests for MOABMeshManager::get_surface_connectivity() and …
Waqar-ukaea Feb 6, 2026
af11f4a
Added method to return dense array of a volume's connectivity indices
Waqar-ukaea Feb 10, 2026
4d080ad
Started on implementing volumetric element tree BVH construction in GPRT
Waqar-ukaea Feb 11, 2026
3eeaab4
Restructured slang shaders in preparation for new tet shaders
Waqar-ukaea Feb 11, 2026
afb6334
Added the required shaders for find_element on GPU
Waqar-ukaea Feb 13, 2026
af36622
Added rest of plumbing to make find_element work with GPRT and 1 ray
Waqar-ukaea Feb 19, 2026
63a57c6
Extended find_element test to also test with GPRT backend
Waqar-ukaea Feb 19, 2026
56c2987
Added a new slang __generic that populates AABBs for objects of arbit…
Waqar-ukaea Feb 19, 2026
874c389
Changed a constant to be static const to be slang compliant
Waqar-ukaea Feb 26, 2026
ac2b0f8
Made plucker_tet_containment code cross-compatible between C++ and slang
Waqar-ukaea Feb 27, 2026
342d07e
Switch to pre-allocate arrays in BVH construction rather than relying…
Waqar-ukaea Feb 27, 2026
ac4c7f3
Rebased to latest main
Waqar-ukaea Mar 19, 2026
bb61bf6
Addressed comments and created new shared_constants.h header
Waqar-ukaea Apr 10, 2026
ace02e9
Reverted change to constants
Waqar-ukaea Apr 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,14 @@ src/gprt/ray_tracer.cpp
list(APPEND xdg_device_codes
dbl_deviceCode
)

list(APPEND xdg_slang_headers
src/gprt/triangle_rt_shaders.slang
src/gprt/tetrahedron_rt_shaders.slang
include/xdg/gprt/shared_structs.h
include/xdg/gprt/rt_common.slangh
include/xdg/shared_enums.h
include/xdg/geometry/dp_math.h
)
endif()

if (XDG_ENABLE_LIBMESH)
Expand Down Expand Up @@ -267,9 +274,7 @@ if (XDG_ENABLE_GPRT)
OUTPUT_TARGET
${device_code}
HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/include/xdg/gprt/shared_structs.h
${CMAKE_CURRENT_SOURCE_DIR}/include/xdg/shared_enums.h
${CMAKE_CURRENT_SOURCE_DIR}/include/xdg/geometry/dp_math.h
${CMAKE_CURRENT_SOURCE_DIR}/${xdg_slang_headers}
SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/src/gprt/${device_code}.slang
)
Expand Down
18 changes: 9 additions & 9 deletions include/xdg/geometry/dp_math.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,19 @@ functions for dot product, cross product, and absolute value. In C++ compilation
while in Slang compilation, it maps to `double3`.
*/

#ifdef __SLANG__
#if defined(__SLANG__) || defined(__SLANG_COMPILER__)

// Slang compilation, map dp::vec3 -> double3
namespace dp {
typedef double3 vec3;

static const double DBL_EPS = 2.2204460492503131e-016;
static const double PLUCKER_ZERO_TOL = 20.0 * DBL_EPS;
static const double INFTY = 1.7976931348623157e+308;

inline double dot(vec3 a, vec3 b) { return ::dot(a, b); }
inline vec3 cross(vec3 a, vec3 b) { return ::cross(a, b); }
inline double abs(double a) { return ::abs(a); }

static const double DBL_ZERO_TOL = 20 * DBL_EPSILON;
static const double INFTY = 1.7976931348623157e+308; // std::numeric_limits<double>::max() is not available in slang
}

#else
Expand All @@ -29,15 +30,14 @@ namespace dp {
namespace dp {
typedef xdg::Vec3da vec3;

static constexpr double PLUCKER_ZERO_TOL = 20.0 * std::numeric_limits<double>::epsilon();
constexpr double INFTY {std::numeric_limits<double>::max()};

inline double dot(vec3 a, vec3 b) { return xdg::dot(a, b); }
inline vec3 cross(vec3 a, vec3 b) { return xdg::cross(a, b); }
inline double abs(double a) { return std::fabs(a); }

static constexpr double DBL_ZERO_TOL = 20.0 * std::numeric_limits<double>::epsilon();
constexpr double INFTY {std::numeric_limits<double>::max()};

}

#endif // end of ifdef __slang__

#endif // DP_MATH_H
#endif // DP_MATH_H
37 changes: 34 additions & 3 deletions include/xdg/geometry/plucker.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ struct PluckerIntersectionResult {
double t = 0.0; // Distance along the ray to the intersection point
};

constexpr PluckerIntersectionResult EXIT_EARLY = {false, 0.0};
static const PluckerIntersectionResult EXIT_EARLY = {false, 0.0};

/* Function to return the vertex with the lowest coordinates. To force the same
ray-edge computation, the Plücker test needs to use consistent edge
Expand Down Expand Up @@ -56,7 +56,7 @@ inline double plucker_edge_test(dp::vec3 vertexa, dp::vec3 vertexb,
pip = dp::dot(ray, edge_normal) + dp::dot(ray_normal, edge);
pip = -pip;
}
if (dp::DBL_ZERO_TOL > dp::abs(pip)) // <-- absd
if (dp::PLUCKER_ZERO_TOL > dp::abs(pip)) // <-- absd
pip = 0.0;
return pip;
}
Expand Down Expand Up @@ -160,6 +160,37 @@ inline PluckerIntersectionResult plucker_ray_tri_intersect(dp::vec3 vertices[3],
return {true, dist_out};
}

// Plücker containment test for tetrahedra. Returns true if the point is inside the tetrahedron defined by vertices v0, v1, v2, v3.
inline bool plucker_tet_containment_test(const dp::vec3 point,
const dp::vec3 v0,
const dp::vec3 v1,
const dp::vec3 v2,
const dp::vec3 v3) {

// TODO - I've decided to use Cramer's rule here instead of matrix inversion to make it easier to implement a cross-compilable version of this function
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general this looks fine to me. In my branch for quads/hexes I've moved to a Plucker coordinate check against face and I might recommend we change to that broadly as it naturally extends to linear elements with an arbitrary number of faces, but I'm curious, what was incompatible in the previous version?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's been a while, but I think the main issue was related to row/column convention differences between the matrix API used by GPRT/Slang and the linalg API used on the C++ side. The old method (still in main) was doing this matrix multiplication to solve for our barycentric coords:

    // Solve T * [λ1, λ2, λ3] = rhs
    double3 lambda123 = mul(inverse(T),{rhs.x, rhs.y, rhs.z});

I initially tried to implement the same logic on the Slang side, but I got different barycentric coordinates on CPU vs GPU. My understanding is that the two math APIs were interpreting the matrix row/column ordering differently which was leading to differences when I would call mul(...).

I switched to Cramer's rule to avoid the matrix inversion and multiplication path entirely. That keeps the shared Slang/C++ implementation to just dot and cross products, which is easier to keep consistent across both C++ and slang.

I might recommend we change to that broadly as it naturally extends to linear elements with an arbitrary number of faces, but I'm curious, what was incompatible in the previous version?

Agreed, seems reasonable to move towards a more arbitrary method.

// Not sure if that is necessarily the best choice however.
const dp::vec3 e0 = v1 - v0;
const dp::vec3 e1 = v2 - v0;
const dp::vec3 e2 = v3 - v0;
const dp::vec3 rhs = point - v0;
const double det = dp::dot(e0, dp::cross(e1, e2)); // scalar triple product of matrix [e0 e1 e2]

const double inv_det = 1.0 / det;
const double lambda1 = dp::dot(rhs, dp::cross(e1, e2)) * inv_det;
const double lambda2 = dp::dot(e0, dp::cross(rhs, e2)) * inv_det;
const double lambda3 = dp::dot(e0, dp::cross(e1, rhs)) * inv_det;
const double lambda0 = 1.0 - (lambda1 + lambda2 + lambda3);

const double barycentric_min = -dp::PLUCKER_ZERO_TOL;
const double barycentric_max = 1.0 + dp::PLUCKER_ZERO_TOL;

Comment on lines +184 to +186
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems like these could be compile-time constants, no?

// Check all λ_i in [0, 1]
return (lambda0 >= barycentric_min && lambda0 <= barycentric_max) &&
(lambda1 >= barycentric_min && lambda1 <= barycentric_max) &&
(lambda2 >= barycentric_min && lambda2 <= barycentric_max) &&
(lambda3 >= barycentric_min && lambda3 <= barycentric_max);
}

} // namespace xdg

#endif // include guard
#endif // include guard
29 changes: 14 additions & 15 deletions include/xdg/gprt/ray_tracer.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ enum class RayGenType {
RAY_FIRE,
POINT_IN_VOLUME,
OCCLUDED,
CLOSEST
CLOSEST,
FIND_ELEMENT
};

struct gprtRayHit {
Expand All @@ -49,16 +50,9 @@ class GPRTRayTracer : public RayTracer {
// Setup the different shader programs for use with this ray tracer
void setup_shaders();

MeshID find_element(const Position& point) const override
{
fatal_error("Element trees not currently supported with GPRT ray tracer");
return ID_NONE;
};
MeshID find_element(const Position& point) const override;

MeshID find_element(TreeID tree, const Position& point) const override {
fatal_error("Element trees not currently supported with GPRT ray tracer");
return ID_NONE;
};
MeshID find_element(TreeID tree, const Position& point) const override;

std::pair<TreeID, TreeID>
register_volume(const std::shared_ptr<MeshManager>& mesh_manager,
Expand Down Expand Up @@ -114,25 +108,30 @@ class GPRTRayTracer : public RayTracer {
// Shader programs
std::map<RayGenType, GPRTRayGenOf<dblRayGenData>> rayGenPrograms_;

GPRTMissOf<void> missProgram_;
GPRTComputeOf<DPTriangleGeomData> aabbPopulationProgram_; //<! AABB population program for double precision rays

GPRTMissOf<void> triangleMissProgram_;
GPRTMissOf<void> tetMissProgram_;
GPRTComputeOf<DPTriangleGeomData> aabbTriPopulationProgram_; //<! AABB population program for double precision rays against triangle geometries
GPRTComputeOf<DPTetrahedronGeomData> aabbTetPopulationProgram_; //<! AABB population program for double precision rays against tetrahedron geometries

// Buffers
gprtRayHit rayHitBuffers_;
GPRTBufferOf<int32_t> excludePrimitivesBuffer_; //<! Buffer for excluded primitives

// Geometry Type and Instances
std::vector<gprt::Instance> globalBlasInstances_; //<! List of every BLAS instance stored in this ray tracer
GPRTGeomTypeOf<DPTriangleGeomData> trianglesGeomType_; //<! Geometry type for triangles
GPRTGeomTypeOf<DPTetrahedronGeomData> tetrahedraGeomType_; //<! Geometry type for tetrahedra (not currently supported)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not currently supported?


// Ray Generation parameters
uint32_t numRayTypes_ = 1; // <! Number of ray types. Allows multiple shaders to be set to the same geometery
uint32_t numRayTypes_ = RT_NUM_RAY_TYPES; // <! Number of ray types. 0=surface, 1=volume
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's remove the values in the comments here lest they change in the code itself and we forget to update the values here.


// Mesh-to-Scene maps
std::map<MeshID, GPRTGeomOf<DPTriangleGeomData>> surface_to_geometry_map_; //<! Map from mesh surface to embree geometry

// Internal GPRT Mappings
std::unordered_map<SurfaceTreeID, GPRTAccel> surface_volume_tree_to_accel_map; // Map from XDG::TreeID to GPRTAccel for volume TLAS
std::unordered_map<ElementTreeID, GPRTAccel> element_volume_tree_to_accel_map; // Map from XDG::TreeID to GPRTAccel for element TLAS

std::vector<GPRTAccel> blas_handles_; // Store BLAS handles so that they can be explicitly referenced in destructor

// Global Tree IDs
Expand All @@ -143,4 +142,4 @@ class GPRTRayTracer : public RayTracer {

} // namespace xdg

#endif // include guard
#endif // include guard
79 changes: 79 additions & 0 deletions include/xdg/gprt/rt_common.slangh
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#ifndef XDG_GPRT_RT_COMMON_SLANGH
#define XDG_GPRT_RT_COMMON_SLANGH

#include "shared_structs.h"
#include "../geometry/dp_math.h" // dp math shared between C++ and slang
#include "../geometry/plucker.h" // Plücker ray-edge intersection test

// Shared types for both Triangle and Tetrahedron ray tracing pipelines.
[[vk::push_constant]]
dblRayFirePushConstants PC;

static const int32_t ID_NONE = -1; // slang copy of the C++ constant for invalid IDs
struct DPAttribute
{
double f64t; // double precision hit distance
int global_prim_id;
};

// ------------------------------------------------- Helper functions -------------------------------------------------

// Templated/generic function to compute AABBs in double precision and fill the float AABB buffer. Generic to handle triangles, tets and future primitives with different numbers of vertices
__generic<int N>
inline void populate_aabb(uint primID, double3 *vertex, float3 *aabb, uint indices[N])
{
double3 dpAabbMin = vertex[indices[0]];
double3 dpAabbMax = dpAabbMin;

// unroll the loop since N is known to be small(ish) at compile time to keep performance
[unroll]
for (uint i = 1; i < N; ++i)
{
double3 vert = vertex[indices[i]];
dpAabbMin = min(dpAabbMin, vert);
dpAabbMax = max(dpAabbMax, vert);
}

float3 fpAabbMin = float3(dpAabbMin - double3(FLT_EPSILON, FLT_EPSILON, FLT_EPSILON));
float3 fpAabbMax = float3(dpAabbMax + double3(FLT_EPSILON, FLT_EPSILON, FLT_EPSILON));

aabb[2 * primID] = fpAabbMin;
aabb[2 * primID + 1] = fpAabbMax;
}


bool orientation_cull(in double3 ray, in double3 normal, in xdg::HitOrientation orientation) {
if (orientation == xdg::HitOrientation::ANY) return false; // No culling
double dot_product = dot(ray, normal);
if (orientation == xdg::HitOrientation::EXITING) return dot_product < 0.0; // Cull exiting rays
if (orientation == xdg::HitOrientation::ENTERING) return dot_product > 0.0; // Cull entering rays
return false; // Default case, no culling
}


/* Function to return the vertex with the lowest coordinates. To force the same
ray-edge computation, the Plücker test needs to use consistent edge
representation. This would be more simple with MOAB handles instead of
coordinates... */
inline bool first(in double3 a, in double3 b)
{
if (a[0] < b[0]) return true;

if (a[0] == b[0] && a[1] < b[1]) return true;

if (a[1] == b[1] && a[2] < b[2]) return true;

return false;
}

float next_after(float a) {
uint a_ = asuint(a);
if (a < 0) {
a_--;
} else {
a_++;
}
return asfloat(a_);
}

#endif // XDG_GPRT_RT_COMMON_SLANGH
22 changes: 21 additions & 1 deletion include/xdg/gprt/shared_structs.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@
#include "../shared_enums.h"
#include "../geometry/dp_math.h"


namespace xdg {
static const uint RT_SURFACE_RAY_INDEX = 0; // SBT ray index for surface ray queries
static const uint RT_VOLUME_RAY_INDEX = 1; // SBT ray index for volumetric ray queries
static const uint RT_NUM_RAY_TYPES = 2; // total number of ray types (surface and volume)
static const uint RT_SURFACE_MISS_INDEX = 0; // SBT ray index for surface ray miss shader
static const uint RT_VOLUME_MISS_INDEX = 1; // SBT ray index for
}
struct GPRTPrimitiveRef
{
int id; // ID of the primitive
Expand All @@ -18,7 +26,8 @@ struct dblRay
int32_t exclude_count; // Number of excluded primitives
xdg::HitOrientation hitOrientation;
int volume_tree; // TreeID of the volume being queried
SurfaceAccelerationStructure volume_accel; // The volume accel
SurfaceAccelerationStructure volume_accel_surf; // The volume accel for surface acceleration structure
SolidAccelerationStructure volume_accel_solid; // The volume accel for solid acceleration structure
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The use of solid here feels a little off. What about "ElementAccelerationStrucutre"?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do agree but both SurfaceAccelerationStructure and SolidAccelerationStructure are types defined in GPRT here - https://github.com/gprt-org/GPRT/blob/master/gprt/gprt_builtins.slang

So I would need to put in a PR to make the name change upstream to make that change

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or I suppose I could typedef them here locally in XDG to have that extra consistency with the naming scheme we have.

};

struct dblHit
Expand Down Expand Up @@ -47,6 +56,17 @@ struct DPTriangleGeomData {
int num_faces; // Number of faces in the geometry
};

struct DPTetrahedronGeomData {
double3 *vertex; // vertex buffer
float3 *aabbs; // AABB buffer
uint4 *index; // index buffer
int32_t vol_id;
dblRay *ray; // double precision rays
xdg::HitOrientation hitOrientation;
GPRTPrimitiveRef* primitive_refs;
int num_tets; // Number of tetrahedra in the geometry
};

struct dblRayGenData {
dblRay *ray;
dblHit *hit;
Expand Down
1 change: 1 addition & 0 deletions include/xdg/shared_enums.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
namespace xdg {

enum PointInVolume : int {
UNSET = -1,
OUTSIDE = 0,
INSIDE = 1
};
Expand Down
6 changes: 1 addition & 5 deletions include/xdg/tetrahedron_contain.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@


#include "xdg/vec3da.h"
#include "xdg/geometry/plucker.h"

namespace xdg
{
Expand Down Expand Up @@ -30,11 +31,6 @@ namespace xdg
* @return `true` if the point is inside or on the boundary of the tetrahedron,
* `false` otherwise.
*/
bool plucker_tet_containment_test(const Position& point,
const Vertex& v0,
const Vertex& v1,
const Vertex& v2,
const Vertex& v3);

// Embree call back functions for element search
void VolumeElementBoundsFunc(RTCBoundsFunctionArguments* args);
Expand Down
Loading
Loading