-
Notifications
You must be signed in to change notification settings - Fork 181
Description
Summary
IGC produces incorrect code when compiling a SPIR-V module where:
- A kernel entry point calls an inner function via
OpFunctionCall - The inner function takes
byval align 64struct parameters (8×double, 64 bytes) - The inner function does not have
Exportlinkage (so IGC inlines it) - The struct fields are used in complex arithmetic with other byval vector arguments
After inlining, loads from certain struct fields return 0 instead of their correct values. Specifically, alternating fields in the upper half of the struct are corrupted (e.g., dirX.s5, dirX.s7, dirY.s4, dirY.s6).
Adding OpDecorate %inner_function LinkageAttributes "..." Export prevents inlining and fixes the issue, confirming the bug is in IGC's inlining/argument-copying pass.
Environment
- GPU: Intel Arc A770 (DG2, PCI
0x56a0) - Driver:
intel-opencl-icd 26.05.37020.3-0 - IGC:
2.28.4 - Kernel:
6.11.0-29-generic - OS: Ubuntu 24.04
Reproducer
The reproducer consists of:
- A SPIR-V assembly file (
repro.spvasm) — assemble withspirv-as - A host C++ program (
repro.cpp) — pure OpenCL, loads SPIR-V viaclCreateProgramWithIL
Build & Run
# Assemble the SPIR-V (BAD version, no Export linkage → bug triggers)
spirv-as repro.spvasm -o repro_bad.spv
# Create the GOOD version (add Export linkage to prevent inlining)
# Add this line to repro.spvasm after the __chipspv_device_heap decorations:
# OpDecorate %22 LinkageAttributes "inner_impl" Export
spirv-as repro_good.spvasm -o repro_good.spv
# Build host program
g++ -O2 repro.cpp -o repro -lOpenCL
# Test
./repro repro_bad.spv # FAIL — struct fields read as 0
./repro repro_good.spv # PASS — Export linkage prevents inliningExpected vs Actual Output
# BAD (no Export → IGC inlines → wrong values):
dirX.s5 = +0.0 (expected -1.0) MISMATCH
dirX.s7 = +0.0 (expected +1.0) MISMATCH
dirY.s4 = +0.0 (expected +1.0) MISMATCH
dirY.s6 = +0.0 (expected -1.0) MISMATCH
FAIL
# GOOD (Export linkage → no inlining → correct):
All fields match expected values
PASS
Host Program (repro.cpp)
// IGC bug reproducer: byval align 64 struct fields corrupted during inlining
// Build: g++ -O2 repro.cpp -o repro -lOpenCL
// Run: ./repro repro_bad.spv (FAIL)
// ./repro repro_good.spv (PASS)
#define CL_TARGET_OPENCL_VERSION 210
#include <CL/cl.h>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#define CHK(x) do { cl_int e=(x); if(e) { \
fprintf(stderr, "FAIL: %s returned %d at line %d\n", #x, e, __LINE__); \
exit(1); } } while(0)
typedef struct __attribute__((aligned(64))) {
double s0, s1, s2, s3, s4, s5, s6, s7;
} double8_aligned;
typedef struct __attribute__((aligned(32))) {
double x, y, z, w;
} double4_aligned;
static unsigned char* load_spirv(const char* path, size_t* size) {
FILE* f = fopen(path, "rb");
if (!f) { fprintf(stderr, "Cannot open %s\n", path); exit(1); }
fseek(f, 0, SEEK_END);
*size = ftell(f);
fseek(f, 0, SEEK_SET);
unsigned char* buf = (unsigned char*)malloc(*size);
if (fread(buf, 1, *size, f) != *size) {
fprintf(stderr, "Read error\n"); exit(1);
}
fclose(f);
return buf;
}
int main(int argc, char** argv) {
const char* spv_path = argc > 1 ? argv[1] : "repro_bad.spv";
cl_platform_id plat;
cl_device_id dev;
cl_int err;
CHK(clGetPlatformIDs(1, &plat, nullptr));
CHK(clGetDeviceIDs(plat, CL_DEVICE_TYPE_GPU, 1, &dev, nullptr));
char name[256];
clGetDeviceInfo(dev, CL_DEVICE_NAME, sizeof(name), name, nullptr);
printf("Device: %s\n", name);
cl_context ctx = clCreateContext(nullptr, 1, &dev, nullptr, nullptr, &err);
CHK(err);
cl_command_queue queue = clCreateCommandQueueWithProperties(ctx, dev, nullptr, &err);
CHK(err);
size_t spv_size;
unsigned char* spv_data = load_spirv(spv_path, &spv_size);
printf("Loaded %s (%zu bytes)\n", spv_path, spv_size);
cl_program prog = clCreateProgramWithIL(ctx, spv_data, spv_size, &err);
CHK(err);
err = clBuildProgram(prog, 1, &dev, nullptr, nullptr, nullptr);
if (err != CL_SUCCESS) {
char log[8192];
clGetProgramBuildInfo(prog, dev, CL_PROGRAM_BUILD_LOG, sizeof(log), log, nullptr);
fprintf(stderr, "Build failed (err=%d):\n%s\n", err, log);
exit(1);
}
// Entry point name from the SPIR-V
cl_kernel kern = clCreateKernel(prog,
"_Z9lbmKernelPd7double8S0_PKddd15HIP_vector_typeIdLj4EES4_", &err);
CHK(err);
cl_mem d_out = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, 20 * sizeof(double), nullptr, &err);
CHK(err);
// Struct fields: s4-s7 intentionally DIFFERENT from s0-s3
double8_aligned dirX, dirY;
dirX.s0 = 1.0; dirX.s1 = 0.0; dirX.s2 = -1.0; dirX.s3 = 0.0;
dirX.s4 = 1.0; dirX.s5 = -1.0; dirX.s6 = -1.0; dirX.s7 = 1.0;
dirY.s0 = 0.0; dirY.s1 = 1.0; dirY.s2 = 0.0; dirY.s3 = -1.0;
dirY.s4 = 1.0; dirY.s5 = 1.0; dirY.s6 = -1.0; dirY.s7 = -1.0;
double w[9] = {4.0/9, 1.0/9, 1.0/9, 1.0/9, 1.0/9,
1.0/36, 1.0/36, 1.0/36, 1.0/36};
cl_mem d_weight = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
9 * sizeof(double), w, &err);
CHK(err);
double omega = 1.2, f0 = 5.0;
double4_aligned f1234 = {1.0, 1.0, 1.0, 1.0};
double4_aligned f5678 = {0.5, 0.5, 0.5, 0.5};
int arg = 0;
CHK(clSetKernelArg(kern, arg++, sizeof(d_out), &d_out));
CHK(clSetKernelArg(kern, arg++, sizeof(double8_aligned), &dirX));
CHK(clSetKernelArg(kern, arg++, sizeof(double8_aligned), &dirY));
CHK(clSetKernelArg(kern, arg++, sizeof(d_weight), &d_weight));
CHK(clSetKernelArg(kern, arg++, sizeof(double), &omega));
CHK(clSetKernelArg(kern, arg++, sizeof(double), &f0));
CHK(clSetKernelArg(kern, arg++, sizeof(double4_aligned), &f1234));
CHK(clSetKernelArg(kern, arg++, sizeof(double4_aligned), &f5678));
size_t global = 1, local = 1;
CHK(clEnqueueNDRangeKernel(queue, kern, 1, nullptr, &global, &local, 0, nullptr, nullptr));
double h_out[20];
CHK(clEnqueueReadBuffer(queue, d_out, CL_TRUE, 0, 20 * sizeof(double), h_out, 0, nullptr, nullptr));
// out[3..10] = dirX.s0..s7, out[11..18] = dirY.s0..s7
double expX[] = {1.0, 0.0, -1.0, 0.0, 1.0, -1.0, -1.0, 1.0};
double expY[] = {0.0, 1.0, 0.0, -1.0, 1.0, 1.0, -1.0, -1.0};
bool pass = true;
for (int i = 0; i < 8; i++) {
if (h_out[3 + i] != expX[i]) {
printf("dirX.s%d = %+.1f (expected %+.1f) MISMATCH\n", i, h_out[3+i], expX[i]);
pass = false;
}
if (h_out[11 + i] != expY[i]) {
printf("dirY.s%d = %+.1f (expected %+.1f) MISMATCH\n", i, h_out[11+i], expY[i]);
pass = false;
}
}
printf("%s\n", pass ? "PASS" : "FAIL");
clReleaseMemObject(d_out);
clReleaseMemObject(d_weight);
clReleaseKernel(kern);
clReleaseProgram(prog);
clReleaseCommandQueue(queue);
clReleaseContext(ctx);
free(spv_data);
return pass ? 0 : 1;
}SPIR-V Assembly (repro.spvasm)
The SPIR-V module has a kernel entry point that calls an inner function. The inner function takes two byval align 64 structs (each containing 8 doubles), plus additional byval double4 vector arguments. It loads all struct fields and uses them in arithmetic, then stores them to an output buffer.
Full SPIR-V assembly (click to expand, 440 lines)
; SPIR-V
; Version: 1.0
; Generator: Khronos LLVM/SPIR-V Translator; 14
; Bound: 255
; Schema: 0
OpCapability Addresses
OpCapability Linkage
OpCapability Kernel
OpCapability Float64
OpCapability Int64
OpCapability GenericPointer
OpCapability Int8
%1 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %241 "_Z9lbmKernelPd7double8S0_PKddd15HIP_vector_typeIdLj4EES4_"
OpEntryPoint Kernel %252 "__chip_reset_non_symbols"
OpSource OpenCL_C 200000
OpName %__hip_cuid_5719198a7d94f6f8 "__hip_cuid_5719198a7d94f6f8"
OpName %__chipspv_device_heap "__chipspv_device_heap"
OpName %struct_double8 "struct.double8"
OpName %struct_HIP_vector_type "struct.HIP_vector_type"
OpName %struct_HIP_vector_base "struct.HIP_vector_base"
OpName %union_anon_0 "union.anon.0"
OpName %out_coerce "out.coerce"
OpName %dirX "dirX"
OpName %dirY "dirY"
OpName %weight_coerce "weight.coerce"
OpName %omega "omega"
OpName %f0 "f0"
OpName %f1234 "f1234"
OpName %f5678 "f5678"
OpName %entry "entry"
OpName %add "add"
OpName %y "y"
OpName %y4 "y4"
OpName %add5 "add5"
OpName %z "z"
OpName %z6 "z6"
OpName %add7 "add7"
OpName %w "w"
OpName %w8 "w8"
OpName %add9 "add9"
OpName %add11 "add11"
OpName %add13 "add13"
OpName %add15 "add15"
OpName %add17 "add17"
OpName %s1 "s1"
OpName %s2 "s2"
OpName %s3 "s3"
OpName %s4 "s4"
OpName %s5 "s5"
OpName %s6 "s6"
OpName %s7 "s7"
OpName %s119 "s119"
OpName %s220 "s220"
OpName %s321 "s321"
OpName %s422 "s422"
OpName %s523 "s523"
OpName %s624 "s624"
OpName %s725 "s725"
OpName %mul "mul"
OpName %mul30 "mul30"
OpName %add31 "add31"
OpName %mul34 "mul34"
OpName %mul37 "mul37"
OpName %add38 "add38"
OpName %add39 "add39"
OpName %mul42 "mul42"
OpName %mul45 "mul45"
OpName %add46 "add46"
OpName %add47 "add47"
OpName %mul50 "mul50"
OpName %mul53 "mul53"
OpName %add54 "add54"
OpName %add55 "add55"
OpName %div "div"
OpName %mul58 "mul58"
OpName %mul61 "mul61"
OpName %add62 "add62"
OpName %mul65 "mul65"
OpName %mul68 "mul68"
OpName %add69 "add69"
OpName %add70 "add70"
OpName %mul73 "mul73"
OpName %mul76 "mul76"
OpName %add77 "add77"
OpName %add78 "add78"
OpName %mul81 "mul81"
OpName %mul84 "mul84"
OpName %add85 "add85"
OpName %add86 "add86"
OpName %div87 "div87"
OpName %arrayidx88 "arrayidx88"
OpName %arrayidx89 "arrayidx89"
OpName %arrayidx91 "arrayidx91"
OpName %arrayidx93 "arrayidx93"
OpName %arrayidx95 "arrayidx95"
OpName %arrayidx97 "arrayidx97"
OpName %arrayidx99 "arrayidx99"
OpName %arrayidx101 "arrayidx101"
OpName %arrayidx103 "arrayidx103"
OpName %arrayidx105 "arrayidx105"
OpName %arrayidx107 "arrayidx107"
OpName %arrayidx109 "arrayidx109"
OpName %arrayidx111 "arrayidx111"
OpName %arrayidx113 "arrayidx113"
OpName %arrayidx115 "arrayidx115"
OpName %arrayidx117 "arrayidx117"
OpName %arrayidx119 "arrayidx119"
OpName %arrayidx121 "arrayidx121"
OpName %entry_0 "entry"
OpName %out_coerce_0 "out.coerce"
OpName %dirX_0 "dirX"
OpName %dirY_0 "dirY"
OpName %weight_coerce_0 "weight.coerce"
OpName %omega_0 "omega"
OpName %f0_0 "f0"
OpName %f1234_0 "f1234"
OpName %f5678_0 "f5678"
OpDecorate %__hip_cuid_5719198a7d94f6f8 LinkageAttributes "__hip_cuid_5719198a7d94f6f8" Export
OpDecorate %__chipspv_device_heap LinkageAttributes "__chipspv_device_heap" Export
OpDecorate %__chipspv_device_heap Alignment 8
OpDecorate %dirX FuncParamAttr ByVal
OpDecorate %dirX FuncParamAttr NoCapture
OpDecorate %dirX FuncParamAttr NoWrite
OpDecorate %dirX Alignment 64
OpDecorate %dirY FuncParamAttr ByVal
OpDecorate %dirY FuncParamAttr NoCapture
OpDecorate %dirY FuncParamAttr NoWrite
OpDecorate %dirY Alignment 64
OpDecorate %weight_coerce FuncParamAttr NoAlias
OpDecorate %weight_coerce FuncParamAttr NoCapture
OpDecorate %weight_coerce FuncParamAttr NoReadWrite
OpDecorate %f1234 FuncParamAttr ByVal
OpDecorate %f1234 FuncParamAttr NoCapture
OpDecorate %f1234 FuncParamAttr NoWrite
OpDecorate %f1234 Alignment 32
OpDecorate %f5678 FuncParamAttr ByVal
OpDecorate %f5678 FuncParamAttr NoCapture
OpDecorate %f5678 FuncParamAttr NoWrite
OpDecorate %f5678 Alignment 32
OpDecorate %dirX_0 FuncParamAttr ByVal
OpDecorate %dirX_0 FuncParamAttr NoCapture
OpDecorate %dirX_0 FuncParamAttr NoWrite
OpDecorate %dirX_0 Alignment 64
OpDecorate %dirY_0 FuncParamAttr ByVal
OpDecorate %dirY_0 FuncParamAttr NoCapture
OpDecorate %dirY_0 FuncParamAttr NoWrite
OpDecorate %dirY_0 Alignment 64
OpDecorate %weight_coerce_0 FuncParamAttr NoAlias
OpDecorate %weight_coerce_0 FuncParamAttr NoCapture
OpDecorate %weight_coerce_0 FuncParamAttr NoReadWrite
OpDecorate %f1234_0 FuncParamAttr ByVal
OpDecorate %f1234_0 FuncParamAttr NoCapture
OpDecorate %f1234_0 FuncParamAttr NoWrite
OpDecorate %f1234_0 Alignment 32
OpDecorate %f5678_0 FuncParamAttr ByVal
OpDecorate %f5678_0 FuncParamAttr NoCapture
OpDecorate %f5678_0 FuncParamAttr NoWrite
OpDecorate %f5678_0 Alignment 32
%uchar = OpTypeInt 8 0
%ulong = OpTypeInt 64 0
%uchar_0 = OpConstant %uchar 0
%ulong_8 = OpConstant %ulong 8
%ulong_16 = OpConstant %ulong 16
%ulong_24 = OpConstant %ulong 24
%ulong_32 = OpConstant %ulong 32
%ulong_40 = OpConstant %ulong 40
%ulong_48 = OpConstant %ulong 48
%ulong_56 = OpConstant %ulong 56
%ulong_64 = OpConstant %ulong 64
%ulong_72 = OpConstant %ulong 72
%ulong_80 = OpConstant %ulong 80
%ulong_88 = OpConstant %ulong 88
%ulong_96 = OpConstant %ulong 96
%ulong_104 = OpConstant %ulong 104
%ulong_112 = OpConstant %ulong 112
%ulong_120 = OpConstant %ulong 120
%ulong_128 = OpConstant %ulong 128
%ulong_136 = OpConstant %ulong 136
%ulong_144 = OpConstant %ulong 144
%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
%_ptr_Generic_uchar = OpTypePointer Generic %uchar
%_ptr_CrossWorkgroup__ptr_Generic_uchar = OpTypePointer CrossWorkgroup %_ptr_Generic_uchar
%void = OpTypeVoid
%double = OpTypeFloat 64
%struct_double8 = OpTypeStruct %double %double %double %double %double %double %double %double
%_ptr_Function_struct_double8 = OpTypePointer Function %struct_double8
%v4double = OpTypeVector %double 4
%union_anon_0 = OpTypeStruct %v4double
%struct_HIP_vector_base = OpTypeStruct %union_anon_0
%struct_HIP_vector_type = OpTypeStruct %struct_HIP_vector_base
%_ptr_Function_struct_HIP_vector_type = OpTypePointer Function %struct_HIP_vector_type
%21 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_Function_struct_double8 %_ptr_Function_struct_double8 %_ptr_CrossWorkgroup_uchar %double %double %_ptr_Function_struct_HIP_vector_type %_ptr_Function_struct_HIP_vector_type
%_ptr_Generic_double = OpTypePointer Generic %double
%_ptr_Function_double = OpTypePointer Function %double
%_ptr_Function_uchar = OpTypePointer Function %uchar
%238 = OpTypeFunction %void
%__hip_cuid_5719198a7d94f6f8 = OpVariable %_ptr_CrossWorkgroup_uchar CrossWorkgroup %uchar_0
%8 = OpConstantNull %_ptr_Generic_uchar
%__chipspv_device_heap = OpVariable %_ptr_CrossWorkgroup__ptr_Generic_uchar CrossWorkgroup %8
; ---- Inner function (no Export linkage → IGC will inline this) ----
%22 = OpFunction %void None %21
%out_coerce = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
%dirX = OpFunctionParameter %_ptr_Function_struct_double8
%dirY = OpFunctionParameter %_ptr_Function_struct_double8
%weight_coerce = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
%omega = OpFunctionParameter %double
%f0 = OpFunctionParameter %double
%f1234 = OpFunctionParameter %_ptr_Function_struct_HIP_vector_type
%f5678 = OpFunctionParameter %_ptr_Function_struct_HIP_vector_type
%entry = OpLabel
; [... kernel body: loads struct fields, does arithmetic, stores results ...]
; (Full body in attached SPIR-V assembly)
%33 = OpConvertPtrToU %ulong %out_coerce
%35 = OpConvertUToPtr %_ptr_Generic_double %33
%37 = OpBitcast %_ptr_Function_double %f1234
%38 = OpLoad %double %37 Aligned 32
%39 = OpBitcast %_ptr_Function_double %f5678
%40 = OpLoad %double %39 Aligned 32
%add = OpFAdd %double %38 %40
%43 = OpBitcast %_ptr_Function_uchar %f1234
%y = OpInBoundsPtrAccessChain %_ptr_Function_uchar %43 %ulong_8
%46 = OpBitcast %_ptr_Function_double %y
%47 = OpLoad %double %46 Aligned 8
%48 = OpBitcast %_ptr_Function_uchar %f5678
%y4 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %48 %ulong_8
%50 = OpBitcast %_ptr_Function_double %y4
%51 = OpLoad %double %50 Aligned 8
%add5 = OpFAdd %double %47 %51
%53 = OpBitcast %_ptr_Function_uchar %f1234
%z = OpInBoundsPtrAccessChain %_ptr_Function_uchar %53 %ulong_16
%56 = OpBitcast %_ptr_Function_double %z
%57 = OpLoad %double %56 Aligned 16
%58 = OpBitcast %_ptr_Function_uchar %f5678
%z6 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %58 %ulong_16
%60 = OpBitcast %_ptr_Function_double %z6
%61 = OpLoad %double %60 Aligned 16
%add7 = OpFAdd %double %57 %61
%63 = OpBitcast %_ptr_Function_uchar %f1234
%w = OpInBoundsPtrAccessChain %_ptr_Function_uchar %63 %ulong_24
%66 = OpBitcast %_ptr_Function_double %w
%67 = OpLoad %double %66 Aligned 8
%68 = OpBitcast %_ptr_Function_uchar %f5678
%w8 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %68 %ulong_24
%70 = OpBitcast %_ptr_Function_double %w8
%71 = OpLoad %double %70 Aligned 8
%add9 = OpFAdd %double %67 %71
%add11 = OpFAdd %double %f0 %add
%add13 = OpFAdd %double %add11 %add5
%add15 = OpFAdd %double %add13 %add7
%add17 = OpFAdd %double %add15 %add9
%77 = OpBitcast %_ptr_Function_double %dirX
%78 = OpLoad %double %77 Aligned 64
%79 = OpBitcast %_ptr_Function_uchar %dirX
%s1 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %79 %ulong_8
%81 = OpBitcast %_ptr_Function_double %s1
%82 = OpLoad %double %81 Aligned 8
%83 = OpBitcast %_ptr_Function_uchar %dirX
%s2 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %83 %ulong_16
%85 = OpBitcast %_ptr_Function_double %s2
%86 = OpLoad %double %85 Aligned 16
%87 = OpBitcast %_ptr_Function_uchar %dirX
%s3 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %87 %ulong_24
%89 = OpBitcast %_ptr_Function_double %s3
%90 = OpLoad %double %89 Aligned 8
%91 = OpBitcast %_ptr_Function_uchar %dirX
%s4 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %91 %ulong_32
%94 = OpBitcast %_ptr_Function_double %s4
%95 = OpLoad %double %94 Aligned 32
%96 = OpBitcast %_ptr_Function_uchar %dirX
%s5 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %96 %ulong_40
%99 = OpBitcast %_ptr_Function_double %s5
%100 = OpLoad %double %99 Aligned 8
%101 = OpBitcast %_ptr_Function_uchar %dirX
%s6 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %101 %ulong_48
%104 = OpBitcast %_ptr_Function_double %s6
%105 = OpLoad %double %104 Aligned 16
%106 = OpBitcast %_ptr_Function_uchar %dirX
%s7 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %106 %ulong_56
%109 = OpBitcast %_ptr_Function_double %s7
%110 = OpLoad %double %109 Aligned 8
%111 = OpBitcast %_ptr_Function_double %dirY
%112 = OpLoad %double %111 Aligned 64
%113 = OpBitcast %_ptr_Function_uchar %dirY
%s119 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %113 %ulong_8
%115 = OpBitcast %_ptr_Function_double %s119
%116 = OpLoad %double %115 Aligned 8
%117 = OpBitcast %_ptr_Function_uchar %dirY
%s220 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %117 %ulong_16
%119 = OpBitcast %_ptr_Function_double %s220
%120 = OpLoad %double %119 Aligned 16
%121 = OpBitcast %_ptr_Function_uchar %dirY
%s321 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %121 %ulong_24
%123 = OpBitcast %_ptr_Function_double %s321
%124 = OpLoad %double %123 Aligned 8
%125 = OpBitcast %_ptr_Function_uchar %dirY
%s422 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %125 %ulong_32
%127 = OpBitcast %_ptr_Function_double %s422
%128 = OpLoad %double %127 Aligned 32
%129 = OpBitcast %_ptr_Function_uchar %dirY
%s523 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %129 %ulong_40
%131 = OpBitcast %_ptr_Function_double %s523
%132 = OpLoad %double %131 Aligned 8
%133 = OpBitcast %_ptr_Function_uchar %dirY
%s624 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %133 %ulong_48
%135 = OpBitcast %_ptr_Function_double %s624
%136 = OpLoad %double %135 Aligned 16
%137 = OpBitcast %_ptr_Function_uchar %dirY
%s725 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %137 %ulong_56
%139 = OpBitcast %_ptr_Function_double %s725
%140 = OpLoad %double %139 Aligned 8
%mul = OpFMul %double %38 %78
%mul30 = OpFMul %double %47 %82
%add31 = OpFAdd %double %mul %mul30
%mul34 = OpFMul %double %57 %86
%mul37 = OpFMul %double %67 %90
%add38 = OpFAdd %double %mul34 %mul37
%add39 = OpFAdd %double %add31 %add38
%mul42 = OpFMul %double %40 %95
%mul45 = OpFMul %double %51 %100
%add46 = OpFAdd %double %mul42 %mul45
%add47 = OpFAdd %double %add39 %add46
%mul50 = OpFMul %double %61 %105
%mul53 = OpFMul %double %71 %110
%add54 = OpFAdd %double %mul50 %mul53
%add55 = OpFAdd %double %add47 %add54
%div = OpFDiv %double %add55 %add17
%mul58 = OpFMul %double %38 %112
%mul61 = OpFMul %double %47 %116
%add62 = OpFAdd %double %mul58 %mul61
%mul65 = OpFMul %double %57 %120
%mul68 = OpFMul %double %67 %124
%add69 = OpFAdd %double %mul65 %mul68
%add70 = OpFAdd %double %add62 %add69
%mul73 = OpFMul %double %40 %128
%mul76 = OpFMul %double %51 %132
%add77 = OpFAdd %double %mul73 %mul76
%add78 = OpFAdd %double %add70 %add77
%mul81 = OpFMul %double %61 %136
%mul84 = OpFMul %double %71 %140
%add85 = OpFAdd %double %mul81 %mul84
%add86 = OpFAdd %double %add78 %add85
%div87 = OpFDiv %double %add86 %add17
OpStore %35 %add17 Aligned 8
%173 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx88 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %173 %ulong_8
%175 = OpBitcast %_ptr_Generic_double %arrayidx88
OpStore %175 %div Aligned 8
%176 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx89 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %176 %ulong_16
%178 = OpBitcast %_ptr_Generic_double %arrayidx89
OpStore %178 %div87 Aligned 8
%179 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx91 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %179 %ulong_24
%181 = OpBitcast %_ptr_Generic_double %arrayidx91
OpStore %181 %78 Aligned 8
%182 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx93 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %182 %ulong_32
%184 = OpBitcast %_ptr_Generic_double %arrayidx93
OpStore %184 %82 Aligned 8
%185 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx95 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %185 %ulong_40
%187 = OpBitcast %_ptr_Generic_double %arrayidx95
OpStore %187 %86 Aligned 8
%188 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx97 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %188 %ulong_48
%190 = OpBitcast %_ptr_Generic_double %arrayidx97
OpStore %190 %90 Aligned 8
%191 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx99 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %191 %ulong_56
%193 = OpBitcast %_ptr_Generic_double %arrayidx99
OpStore %193 %95 Aligned 8
%194 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx101 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %194 %ulong_64
%197 = OpBitcast %_ptr_Generic_double %arrayidx101
OpStore %197 %100 Aligned 8
%198 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx103 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %198 %ulong_72
%201 = OpBitcast %_ptr_Generic_double %arrayidx103
OpStore %201 %105 Aligned 8
%202 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx105 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %202 %ulong_80
%205 = OpBitcast %_ptr_Generic_double %arrayidx105
OpStore %205 %110 Aligned 8
%206 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx107 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %206 %ulong_88
%209 = OpBitcast %_ptr_Generic_double %arrayidx107
OpStore %209 %112 Aligned 8
%210 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx109 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %210 %ulong_96
%213 = OpBitcast %_ptr_Generic_double %arrayidx109
OpStore %213 %116 Aligned 8
%214 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx111 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %214 %ulong_104
%217 = OpBitcast %_ptr_Generic_double %arrayidx111
OpStore %217 %120 Aligned 8
%218 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx113 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %218 %ulong_112
%221 = OpBitcast %_ptr_Generic_double %arrayidx113
OpStore %221 %124 Aligned 8
%222 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx115 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %222 %ulong_120
%225 = OpBitcast %_ptr_Generic_double %arrayidx115
OpStore %225 %128 Aligned 8
%226 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx117 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %226 %ulong_128
%229 = OpBitcast %_ptr_Generic_double %arrayidx117
OpStore %229 %132 Aligned 8
%230 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx119 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %230 %ulong_136
%233 = OpBitcast %_ptr_Generic_double %arrayidx119
OpStore %233 %136 Aligned 8
%234 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx121 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %234 %ulong_144
%237 = OpBitcast %_ptr_Generic_double %arrayidx121
OpStore %237 %140 Aligned 8
OpReturn
OpFunctionEnd
; ---- Reset function ----
%239 = OpFunction %void None %238
%entry_0 = OpLabel
OpStore %__hip_cuid_5719198a7d94f6f8 %uchar_0 Aligned 1
OpReturn
OpFunctionEnd
; ---- Entry point: calls inner function ----
%241 = OpFunction %void None %21
%out_coerce_0 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
%dirX_0 = OpFunctionParameter %_ptr_Function_struct_double8
%dirY_0 = OpFunctionParameter %_ptr_Function_struct_double8
%weight_coerce_0 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
%omega_0 = OpFunctionParameter %double
%f0_0 = OpFunctionParameter %double
%f1234_0 = OpFunctionParameter %_ptr_Function_struct_HIP_vector_type
%f5678_0 = OpFunctionParameter %_ptr_Function_struct_HIP_vector_type
%250 = OpLabel
%251 = OpFunctionCall %void %22 %out_coerce_0 %dirX_0 %dirY_0 %weight_coerce_0 %omega_0 %f0_0 %f1234_0 %f5678_0
OpReturn
OpFunctionEnd
; ---- Entry point for reset ----
%252 = OpFunction %void None %238
%253 = OpLabel
%254 = OpFunctionCall %void %239
OpReturn
OpFunctionEnd
Workaround
Adding Export linkage to the inner function prevents IGC from inlining it:
+ OpDecorate %22 LinkageAttributes "inner_impl" ExportAfter this one-line change, the SPIR-V assembles, validates, and produces correct results.
Analysis
The SPIR-V module has a kernel entry point (%241) that forwards all arguments to an inner function (%22) via OpFunctionCall. When the inner function lacks Export linkage, IGC inlines it into the entry point. During this inlining, the byval align 64 struct argument accesses (via OpInBoundsPtrAccessChain on the Function-space pointer) are incorrectly compiled — alternating fields in the upper half of the struct (offsets 40, 56 for one struct; 32, 48 for the other) read as 0.
The SPIR-V passes spirv-val validation.
Impact
This blocks chipStar (HIP implementation for SPIR-V GPUs) from correctly running kernels with __align__(64) struct parameters, including the HeCBench fluidSim LBM benchmark.
Related: chipStar PR #1185, chipStar issue #1180