Skip to content

IGC miscompiles byval align 64 struct fields when inlining SPIR-V function #392

@pvelesko

Description

@pvelesko

Summary

IGC produces incorrect code when compiling a SPIR-V module where:

  1. A kernel entry point calls an inner function via OpFunctionCall
  2. The inner function takes byval align 64 struct parameters (8×double, 64 bytes)
  3. The inner function does not have Export linkage (so IGC inlines it)
  4. The struct fields are used in complex arithmetic with other byval vector arguments

After inlining, loads from certain struct fields return 0 instead of their correct values. Specifically, alternating fields in the upper half of the struct are corrupted (e.g., dirX.s5, dirX.s7, dirY.s4, dirY.s6).

Adding OpDecorate %inner_function LinkageAttributes "..." Export prevents inlining and fixes the issue, confirming the bug is in IGC's inlining/argument-copying pass.

Environment

  • GPU: Intel Arc A770 (DG2, PCI 0x56a0)
  • Driver: intel-opencl-icd 26.05.37020.3-0
  • IGC: 2.28.4
  • Kernel: 6.11.0-29-generic
  • OS: Ubuntu 24.04

Reproducer

The reproducer consists of:

  1. A SPIR-V assembly file (repro.spvasm) — assemble with spirv-as
  2. A host C++ program (repro.cpp) — pure OpenCL, loads SPIR-V via clCreateProgramWithIL

Build & Run

# Assemble the SPIR-V (BAD version, no Export linkage → bug triggers)
spirv-as repro.spvasm -o repro_bad.spv

# Create the GOOD version (add Export linkage to prevent inlining)
# Add this line to repro.spvasm after the __chipspv_device_heap decorations:
#   OpDecorate %22 LinkageAttributes "inner_impl" Export
spirv-as repro_good.spvasm -o repro_good.spv

# Build host program
g++ -O2 repro.cpp -o repro -lOpenCL

# Test
./repro repro_bad.spv    # FAIL — struct fields read as 0
./repro repro_good.spv   # PASS — Export linkage prevents inlining

Expected vs Actual Output

# BAD (no Export → IGC inlines → wrong values):
dirX.s5 = +0.0  (expected -1.0) MISMATCH
dirX.s7 = +0.0  (expected +1.0) MISMATCH
dirY.s4 = +0.0  (expected +1.0) MISMATCH
dirY.s6 = +0.0  (expected -1.0) MISMATCH
FAIL

# GOOD (Export linkage → no inlining → correct):
All fields match expected values
PASS

Host Program (repro.cpp)

// IGC bug reproducer: byval align 64 struct fields corrupted during inlining
// Build: g++ -O2 repro.cpp -o repro -lOpenCL
// Run:   ./repro repro_bad.spv   (FAIL)
//        ./repro repro_good.spv  (PASS)

#define CL_TARGET_OPENCL_VERSION 210
#include <CL/cl.h>
#include <cstdio>
#include <cstdlib>
#include <cstring>

#define CHK(x) do { cl_int e=(x); if(e) { \
  fprintf(stderr, "FAIL: %s returned %d at line %d\n", #x, e, __LINE__); \
  exit(1); } } while(0)

typedef struct __attribute__((aligned(64))) {
  double s0, s1, s2, s3, s4, s5, s6, s7;
} double8_aligned;

typedef struct __attribute__((aligned(32))) {
  double x, y, z, w;
} double4_aligned;

static unsigned char* load_spirv(const char* path, size_t* size) {
  FILE* f = fopen(path, "rb");
  if (!f) { fprintf(stderr, "Cannot open %s\n", path); exit(1); }
  fseek(f, 0, SEEK_END);
  *size = ftell(f);
  fseek(f, 0, SEEK_SET);
  unsigned char* buf = (unsigned char*)malloc(*size);
  if (fread(buf, 1, *size, f) != *size) {
    fprintf(stderr, "Read error\n"); exit(1);
  }
  fclose(f);
  return buf;
}

int main(int argc, char** argv) {
  const char* spv_path = argc > 1 ? argv[1] : "repro_bad.spv";

  cl_platform_id plat;
  cl_device_id dev;
  cl_int err;
  CHK(clGetPlatformIDs(1, &plat, nullptr));
  CHK(clGetDeviceIDs(plat, CL_DEVICE_TYPE_GPU, 1, &dev, nullptr));

  char name[256];
  clGetDeviceInfo(dev, CL_DEVICE_NAME, sizeof(name), name, nullptr);
  printf("Device: %s\n", name);

  cl_context ctx = clCreateContext(nullptr, 1, &dev, nullptr, nullptr, &err);
  CHK(err);
  cl_command_queue queue = clCreateCommandQueueWithProperties(ctx, dev, nullptr, &err);
  CHK(err);

  size_t spv_size;
  unsigned char* spv_data = load_spirv(spv_path, &spv_size);
  printf("Loaded %s (%zu bytes)\n", spv_path, spv_size);

  cl_program prog = clCreateProgramWithIL(ctx, spv_data, spv_size, &err);
  CHK(err);
  err = clBuildProgram(prog, 1, &dev, nullptr, nullptr, nullptr);
  if (err != CL_SUCCESS) {
    char log[8192];
    clGetProgramBuildInfo(prog, dev, CL_PROGRAM_BUILD_LOG, sizeof(log), log, nullptr);
    fprintf(stderr, "Build failed (err=%d):\n%s\n", err, log);
    exit(1);
  }

  // Entry point name from the SPIR-V
  cl_kernel kern = clCreateKernel(prog,
    "_Z9lbmKernelPd7double8S0_PKddd15HIP_vector_typeIdLj4EES4_", &err);
  CHK(err);

  cl_mem d_out = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, 20 * sizeof(double), nullptr, &err);
  CHK(err);

  // Struct fields: s4-s7 intentionally DIFFERENT from s0-s3
  double8_aligned dirX, dirY;
  dirX.s0 = 1.0;  dirX.s1 = 0.0;  dirX.s2 = -1.0; dirX.s3 = 0.0;
  dirX.s4 = 1.0;  dirX.s5 = -1.0; dirX.s6 = -1.0; dirX.s7 = 1.0;
  dirY.s0 = 0.0;  dirY.s1 = 1.0;  dirY.s2 = 0.0;  dirY.s3 = -1.0;
  dirY.s4 = 1.0;  dirY.s5 = 1.0;  dirY.s6 = -1.0; dirY.s7 = -1.0;

  double w[9] = {4.0/9, 1.0/9, 1.0/9, 1.0/9, 1.0/9,
                 1.0/36, 1.0/36, 1.0/36, 1.0/36};
  cl_mem d_weight = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                                    9 * sizeof(double), w, &err);
  CHK(err);

  double omega = 1.2, f0 = 5.0;
  double4_aligned f1234 = {1.0, 1.0, 1.0, 1.0};
  double4_aligned f5678 = {0.5, 0.5, 0.5, 0.5};

  int arg = 0;
  CHK(clSetKernelArg(kern, arg++, sizeof(d_out), &d_out));
  CHK(clSetKernelArg(kern, arg++, sizeof(double8_aligned), &dirX));
  CHK(clSetKernelArg(kern, arg++, sizeof(double8_aligned), &dirY));
  CHK(clSetKernelArg(kern, arg++, sizeof(d_weight), &d_weight));
  CHK(clSetKernelArg(kern, arg++, sizeof(double), &omega));
  CHK(clSetKernelArg(kern, arg++, sizeof(double), &f0));
  CHK(clSetKernelArg(kern, arg++, sizeof(double4_aligned), &f1234));
  CHK(clSetKernelArg(kern, arg++, sizeof(double4_aligned), &f5678));

  size_t global = 1, local = 1;
  CHK(clEnqueueNDRangeKernel(queue, kern, 1, nullptr, &global, &local, 0, nullptr, nullptr));

  double h_out[20];
  CHK(clEnqueueReadBuffer(queue, d_out, CL_TRUE, 0, 20 * sizeof(double), h_out, 0, nullptr, nullptr));

  // out[3..10] = dirX.s0..s7, out[11..18] = dirY.s0..s7
  double expX[] = {1.0, 0.0, -1.0, 0.0, 1.0, -1.0, -1.0, 1.0};
  double expY[] = {0.0, 1.0, 0.0, -1.0, 1.0, 1.0, -1.0, -1.0};

  bool pass = true;
  for (int i = 0; i < 8; i++) {
    if (h_out[3 + i] != expX[i]) {
      printf("dirX.s%d = %+.1f  (expected %+.1f) MISMATCH\n", i, h_out[3+i], expX[i]);
      pass = false;
    }
    if (h_out[11 + i] != expY[i]) {
      printf("dirY.s%d = %+.1f  (expected %+.1f) MISMATCH\n", i, h_out[11+i], expY[i]);
      pass = false;
    }
  }
  printf("%s\n", pass ? "PASS" : "FAIL");

  clReleaseMemObject(d_out);
  clReleaseMemObject(d_weight);
  clReleaseKernel(kern);
  clReleaseProgram(prog);
  clReleaseCommandQueue(queue);
  clReleaseContext(ctx);
  free(spv_data);
  return pass ? 0 : 1;
}

SPIR-V Assembly (repro.spvasm)

The SPIR-V module has a kernel entry point that calls an inner function. The inner function takes two byval align 64 structs (each containing 8 doubles), plus additional byval double4 vector arguments. It loads all struct fields and uses them in arithmetic, then stores them to an output buffer.

Full SPIR-V assembly (click to expand, 440 lines)
; SPIR-V
; Version: 1.0
; Generator: Khronos LLVM/SPIR-V Translator; 14
; Bound: 255
; Schema: 0
               OpCapability Addresses
               OpCapability Linkage
               OpCapability Kernel
               OpCapability Float64
               OpCapability Int64
               OpCapability GenericPointer
               OpCapability Int8
          %1 = OpExtInstImport "OpenCL.std"
               OpMemoryModel Physical64 OpenCL
               OpEntryPoint Kernel %241 "_Z9lbmKernelPd7double8S0_PKddd15HIP_vector_typeIdLj4EES4_"
               OpEntryPoint Kernel %252 "__chip_reset_non_symbols"
               OpSource OpenCL_C 200000
               OpName %__hip_cuid_5719198a7d94f6f8 "__hip_cuid_5719198a7d94f6f8"
               OpName %__chipspv_device_heap "__chipspv_device_heap"
               OpName %struct_double8 "struct.double8"
               OpName %struct_HIP_vector_type "struct.HIP_vector_type"
               OpName %struct_HIP_vector_base "struct.HIP_vector_base"
               OpName %union_anon_0 "union.anon.0"
               OpName %out_coerce "out.coerce"
               OpName %dirX "dirX"
               OpName %dirY "dirY"
               OpName %weight_coerce "weight.coerce"
               OpName %omega "omega"
               OpName %f0 "f0"
               OpName %f1234 "f1234"
               OpName %f5678 "f5678"
               OpName %entry "entry"
               OpName %add "add"
               OpName %y "y"
               OpName %y4 "y4"
               OpName %add5 "add5"
               OpName %z "z"
               OpName %z6 "z6"
               OpName %add7 "add7"
               OpName %w "w"
               OpName %w8 "w8"
               OpName %add9 "add9"
               OpName %add11 "add11"
               OpName %add13 "add13"
               OpName %add15 "add15"
               OpName %add17 "add17"
               OpName %s1 "s1"
               OpName %s2 "s2"
               OpName %s3 "s3"
               OpName %s4 "s4"
               OpName %s5 "s5"
               OpName %s6 "s6"
               OpName %s7 "s7"
               OpName %s119 "s119"
               OpName %s220 "s220"
               OpName %s321 "s321"
               OpName %s422 "s422"
               OpName %s523 "s523"
               OpName %s624 "s624"
               OpName %s725 "s725"
               OpName %mul "mul"
               OpName %mul30 "mul30"
               OpName %add31 "add31"
               OpName %mul34 "mul34"
               OpName %mul37 "mul37"
               OpName %add38 "add38"
               OpName %add39 "add39"
               OpName %mul42 "mul42"
               OpName %mul45 "mul45"
               OpName %add46 "add46"
               OpName %add47 "add47"
               OpName %mul50 "mul50"
               OpName %mul53 "mul53"
               OpName %add54 "add54"
               OpName %add55 "add55"
               OpName %div "div"
               OpName %mul58 "mul58"
               OpName %mul61 "mul61"
               OpName %add62 "add62"
               OpName %mul65 "mul65"
               OpName %mul68 "mul68"
               OpName %add69 "add69"
               OpName %add70 "add70"
               OpName %mul73 "mul73"
               OpName %mul76 "mul76"
               OpName %add77 "add77"
               OpName %add78 "add78"
               OpName %mul81 "mul81"
               OpName %mul84 "mul84"
               OpName %add85 "add85"
               OpName %add86 "add86"
               OpName %div87 "div87"
               OpName %arrayidx88 "arrayidx88"
               OpName %arrayidx89 "arrayidx89"
               OpName %arrayidx91 "arrayidx91"
               OpName %arrayidx93 "arrayidx93"
               OpName %arrayidx95 "arrayidx95"
               OpName %arrayidx97 "arrayidx97"
               OpName %arrayidx99 "arrayidx99"
               OpName %arrayidx101 "arrayidx101"
               OpName %arrayidx103 "arrayidx103"
               OpName %arrayidx105 "arrayidx105"
               OpName %arrayidx107 "arrayidx107"
               OpName %arrayidx109 "arrayidx109"
               OpName %arrayidx111 "arrayidx111"
               OpName %arrayidx113 "arrayidx113"
               OpName %arrayidx115 "arrayidx115"
               OpName %arrayidx117 "arrayidx117"
               OpName %arrayidx119 "arrayidx119"
               OpName %arrayidx121 "arrayidx121"
               OpName %entry_0 "entry"
               OpName %out_coerce_0 "out.coerce"
               OpName %dirX_0 "dirX"
               OpName %dirY_0 "dirY"
               OpName %weight_coerce_0 "weight.coerce"
               OpName %omega_0 "omega"
               OpName %f0_0 "f0"
               OpName %f1234_0 "f1234"
               OpName %f5678_0 "f5678"
               OpDecorate %__hip_cuid_5719198a7d94f6f8 LinkageAttributes "__hip_cuid_5719198a7d94f6f8" Export
               OpDecorate %__chipspv_device_heap LinkageAttributes "__chipspv_device_heap" Export
               OpDecorate %__chipspv_device_heap Alignment 8
               OpDecorate %dirX FuncParamAttr ByVal
               OpDecorate %dirX FuncParamAttr NoCapture
               OpDecorate %dirX FuncParamAttr NoWrite
               OpDecorate %dirX Alignment 64
               OpDecorate %dirY FuncParamAttr ByVal
               OpDecorate %dirY FuncParamAttr NoCapture
               OpDecorate %dirY FuncParamAttr NoWrite
               OpDecorate %dirY Alignment 64
               OpDecorate %weight_coerce FuncParamAttr NoAlias
               OpDecorate %weight_coerce FuncParamAttr NoCapture
               OpDecorate %weight_coerce FuncParamAttr NoReadWrite
               OpDecorate %f1234 FuncParamAttr ByVal
               OpDecorate %f1234 FuncParamAttr NoCapture
               OpDecorate %f1234 FuncParamAttr NoWrite
               OpDecorate %f1234 Alignment 32
               OpDecorate %f5678 FuncParamAttr ByVal
               OpDecorate %f5678 FuncParamAttr NoCapture
               OpDecorate %f5678 FuncParamAttr NoWrite
               OpDecorate %f5678 Alignment 32
               OpDecorate %dirX_0 FuncParamAttr ByVal
               OpDecorate %dirX_0 FuncParamAttr NoCapture
               OpDecorate %dirX_0 FuncParamAttr NoWrite
               OpDecorate %dirX_0 Alignment 64
               OpDecorate %dirY_0 FuncParamAttr ByVal
               OpDecorate %dirY_0 FuncParamAttr NoCapture
               OpDecorate %dirY_0 FuncParamAttr NoWrite
               OpDecorate %dirY_0 Alignment 64
               OpDecorate %weight_coerce_0 FuncParamAttr NoAlias
               OpDecorate %weight_coerce_0 FuncParamAttr NoCapture
               OpDecorate %weight_coerce_0 FuncParamAttr NoReadWrite
               OpDecorate %f1234_0 FuncParamAttr ByVal
               OpDecorate %f1234_0 FuncParamAttr NoCapture
               OpDecorate %f1234_0 FuncParamAttr NoWrite
               OpDecorate %f1234_0 Alignment 32
               OpDecorate %f5678_0 FuncParamAttr ByVal
               OpDecorate %f5678_0 FuncParamAttr NoCapture
               OpDecorate %f5678_0 FuncParamAttr NoWrite
               OpDecorate %f5678_0 Alignment 32
      %uchar = OpTypeInt 8 0
      %ulong = OpTypeInt 64 0
    %uchar_0 = OpConstant %uchar 0
    %ulong_8 = OpConstant %ulong 8
   %ulong_16 = OpConstant %ulong 16
   %ulong_24 = OpConstant %ulong 24
   %ulong_32 = OpConstant %ulong 32
   %ulong_40 = OpConstant %ulong 40
   %ulong_48 = OpConstant %ulong 48
   %ulong_56 = OpConstant %ulong 56
   %ulong_64 = OpConstant %ulong 64
   %ulong_72 = OpConstant %ulong 72
   %ulong_80 = OpConstant %ulong 80
   %ulong_88 = OpConstant %ulong 88
   %ulong_96 = OpConstant %ulong 96
  %ulong_104 = OpConstant %ulong 104
  %ulong_112 = OpConstant %ulong 112
  %ulong_120 = OpConstant %ulong 120
  %ulong_128 = OpConstant %ulong 128
  %ulong_136 = OpConstant %ulong 136
  %ulong_144 = OpConstant %ulong 144
%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
%_ptr_Generic_uchar = OpTypePointer Generic %uchar
%_ptr_CrossWorkgroup__ptr_Generic_uchar = OpTypePointer CrossWorkgroup %_ptr_Generic_uchar
       %void = OpTypeVoid
     %double = OpTypeFloat 64
%struct_double8 = OpTypeStruct %double %double %double %double %double %double %double %double
%_ptr_Function_struct_double8 = OpTypePointer Function %struct_double8
   %v4double = OpTypeVector %double 4
%union_anon_0 = OpTypeStruct %v4double
%struct_HIP_vector_base = OpTypeStruct %union_anon_0
%struct_HIP_vector_type = OpTypeStruct %struct_HIP_vector_base
%_ptr_Function_struct_HIP_vector_type = OpTypePointer Function %struct_HIP_vector_type
         %21 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_Function_struct_double8 %_ptr_Function_struct_double8 %_ptr_CrossWorkgroup_uchar %double %double %_ptr_Function_struct_HIP_vector_type %_ptr_Function_struct_HIP_vector_type
%_ptr_Generic_double = OpTypePointer Generic %double
%_ptr_Function_double = OpTypePointer Function %double
%_ptr_Function_uchar = OpTypePointer Function %uchar
        %238 = OpTypeFunction %void
%__hip_cuid_5719198a7d94f6f8 = OpVariable %_ptr_CrossWorkgroup_uchar CrossWorkgroup %uchar_0
          %8 = OpConstantNull %_ptr_Generic_uchar
%__chipspv_device_heap = OpVariable %_ptr_CrossWorkgroup__ptr_Generic_uchar CrossWorkgroup %8
; ---- Inner function (no Export linkage → IGC will inline this) ----
         %22 = OpFunction %void None %21
 %out_coerce = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
       %dirX = OpFunctionParameter %_ptr_Function_struct_double8
       %dirY = OpFunctionParameter %_ptr_Function_struct_double8
%weight_coerce = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
      %omega = OpFunctionParameter %double
         %f0 = OpFunctionParameter %double
      %f1234 = OpFunctionParameter %_ptr_Function_struct_HIP_vector_type
      %f5678 = OpFunctionParameter %_ptr_Function_struct_HIP_vector_type
      %entry = OpLabel
; [... kernel body: loads struct fields, does arithmetic, stores results ...]
; (Full body in attached SPIR-V assembly)
         %33 = OpConvertPtrToU %ulong %out_coerce
         %35 = OpConvertUToPtr %_ptr_Generic_double %33
         %37 = OpBitcast %_ptr_Function_double %f1234
         %38 = OpLoad %double %37 Aligned 32
         %39 = OpBitcast %_ptr_Function_double %f5678
         %40 = OpLoad %double %39 Aligned 32
        %add = OpFAdd %double %38 %40
         %43 = OpBitcast %_ptr_Function_uchar %f1234
          %y = OpInBoundsPtrAccessChain %_ptr_Function_uchar %43 %ulong_8
         %46 = OpBitcast %_ptr_Function_double %y
         %47 = OpLoad %double %46 Aligned 8
         %48 = OpBitcast %_ptr_Function_uchar %f5678
         %y4 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %48 %ulong_8
         %50 = OpBitcast %_ptr_Function_double %y4
         %51 = OpLoad %double %50 Aligned 8
       %add5 = OpFAdd %double %47 %51
         %53 = OpBitcast %_ptr_Function_uchar %f1234
          %z = OpInBoundsPtrAccessChain %_ptr_Function_uchar %53 %ulong_16
         %56 = OpBitcast %_ptr_Function_double %z
         %57 = OpLoad %double %56 Aligned 16
         %58 = OpBitcast %_ptr_Function_uchar %f5678
         %z6 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %58 %ulong_16
         %60 = OpBitcast %_ptr_Function_double %z6
         %61 = OpLoad %double %60 Aligned 16
       %add7 = OpFAdd %double %57 %61
         %63 = OpBitcast %_ptr_Function_uchar %f1234
          %w = OpInBoundsPtrAccessChain %_ptr_Function_uchar %63 %ulong_24
         %66 = OpBitcast %_ptr_Function_double %w
         %67 = OpLoad %double %66 Aligned 8
         %68 = OpBitcast %_ptr_Function_uchar %f5678
         %w8 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %68 %ulong_24
         %70 = OpBitcast %_ptr_Function_double %w8
         %71 = OpLoad %double %70 Aligned 8
       %add9 = OpFAdd %double %67 %71
      %add11 = OpFAdd %double %f0 %add
      %add13 = OpFAdd %double %add11 %add5
      %add15 = OpFAdd %double %add13 %add7
      %add17 = OpFAdd %double %add15 %add9
         %77 = OpBitcast %_ptr_Function_double %dirX
         %78 = OpLoad %double %77 Aligned 64
         %79 = OpBitcast %_ptr_Function_uchar %dirX
         %s1 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %79 %ulong_8
         %81 = OpBitcast %_ptr_Function_double %s1
         %82 = OpLoad %double %81 Aligned 8
         %83 = OpBitcast %_ptr_Function_uchar %dirX
         %s2 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %83 %ulong_16
         %85 = OpBitcast %_ptr_Function_double %s2
         %86 = OpLoad %double %85 Aligned 16
         %87 = OpBitcast %_ptr_Function_uchar %dirX
         %s3 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %87 %ulong_24
         %89 = OpBitcast %_ptr_Function_double %s3
         %90 = OpLoad %double %89 Aligned 8
         %91 = OpBitcast %_ptr_Function_uchar %dirX
         %s4 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %91 %ulong_32
         %94 = OpBitcast %_ptr_Function_double %s4
         %95 = OpLoad %double %94 Aligned 32
         %96 = OpBitcast %_ptr_Function_uchar %dirX
         %s5 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %96 %ulong_40
         %99 = OpBitcast %_ptr_Function_double %s5
        %100 = OpLoad %double %99 Aligned 8
        %101 = OpBitcast %_ptr_Function_uchar %dirX
         %s6 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %101 %ulong_48
        %104 = OpBitcast %_ptr_Function_double %s6
        %105 = OpLoad %double %104 Aligned 16
        %106 = OpBitcast %_ptr_Function_uchar %dirX
         %s7 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %106 %ulong_56
        %109 = OpBitcast %_ptr_Function_double %s7
        %110 = OpLoad %double %109 Aligned 8
        %111 = OpBitcast %_ptr_Function_double %dirY
        %112 = OpLoad %double %111 Aligned 64
        %113 = OpBitcast %_ptr_Function_uchar %dirY
       %s119 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %113 %ulong_8
        %115 = OpBitcast %_ptr_Function_double %s119
        %116 = OpLoad %double %115 Aligned 8
        %117 = OpBitcast %_ptr_Function_uchar %dirY
       %s220 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %117 %ulong_16
        %119 = OpBitcast %_ptr_Function_double %s220
        %120 = OpLoad %double %119 Aligned 16
        %121 = OpBitcast %_ptr_Function_uchar %dirY
       %s321 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %121 %ulong_24
        %123 = OpBitcast %_ptr_Function_double %s321
        %124 = OpLoad %double %123 Aligned 8
        %125 = OpBitcast %_ptr_Function_uchar %dirY
       %s422 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %125 %ulong_32
        %127 = OpBitcast %_ptr_Function_double %s422
        %128 = OpLoad %double %127 Aligned 32
        %129 = OpBitcast %_ptr_Function_uchar %dirY
       %s523 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %129 %ulong_40
        %131 = OpBitcast %_ptr_Function_double %s523
        %132 = OpLoad %double %131 Aligned 8
        %133 = OpBitcast %_ptr_Function_uchar %dirY
       %s624 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %133 %ulong_48
        %135 = OpBitcast %_ptr_Function_double %s624
        %136 = OpLoad %double %135 Aligned 16
        %137 = OpBitcast %_ptr_Function_uchar %dirY
       %s725 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %137 %ulong_56
        %139 = OpBitcast %_ptr_Function_double %s725
        %140 = OpLoad %double %139 Aligned 8
        %mul = OpFMul %double %38 %78
      %mul30 = OpFMul %double %47 %82
      %add31 = OpFAdd %double %mul %mul30
      %mul34 = OpFMul %double %57 %86
      %mul37 = OpFMul %double %67 %90
      %add38 = OpFAdd %double %mul34 %mul37
      %add39 = OpFAdd %double %add31 %add38
      %mul42 = OpFMul %double %40 %95
      %mul45 = OpFMul %double %51 %100
      %add46 = OpFAdd %double %mul42 %mul45
      %add47 = OpFAdd %double %add39 %add46
      %mul50 = OpFMul %double %61 %105
      %mul53 = OpFMul %double %71 %110
      %add54 = OpFAdd %double %mul50 %mul53
      %add55 = OpFAdd %double %add47 %add54
        %div = OpFDiv %double %add55 %add17
      %mul58 = OpFMul %double %38 %112
      %mul61 = OpFMul %double %47 %116
      %add62 = OpFAdd %double %mul58 %mul61
      %mul65 = OpFMul %double %57 %120
      %mul68 = OpFMul %double %67 %124
      %add69 = OpFAdd %double %mul65 %mul68
      %add70 = OpFAdd %double %add62 %add69
      %mul73 = OpFMul %double %40 %128
      %mul76 = OpFMul %double %51 %132
      %add77 = OpFAdd %double %mul73 %mul76
      %add78 = OpFAdd %double %add70 %add77
      %mul81 = OpFMul %double %61 %136
      %mul84 = OpFMul %double %71 %140
      %add85 = OpFAdd %double %mul81 %mul84
      %add86 = OpFAdd %double %add78 %add85
      %div87 = OpFDiv %double %add86 %add17
               OpStore %35 %add17 Aligned 8
        %173 = OpBitcast %_ptr_Generic_uchar %35
 %arrayidx88 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %173 %ulong_8
        %175 = OpBitcast %_ptr_Generic_double %arrayidx88
               OpStore %175 %div Aligned 8
        %176 = OpBitcast %_ptr_Generic_uchar %35
 %arrayidx89 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %176 %ulong_16
        %178 = OpBitcast %_ptr_Generic_double %arrayidx89
               OpStore %178 %div87 Aligned 8
        %179 = OpBitcast %_ptr_Generic_uchar %35
 %arrayidx91 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %179 %ulong_24
        %181 = OpBitcast %_ptr_Generic_double %arrayidx91
               OpStore %181 %78 Aligned 8
        %182 = OpBitcast %_ptr_Generic_uchar %35
 %arrayidx93 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %182 %ulong_32
        %184 = OpBitcast %_ptr_Generic_double %arrayidx93
               OpStore %184 %82 Aligned 8
        %185 = OpBitcast %_ptr_Generic_uchar %35
 %arrayidx95 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %185 %ulong_40
        %187 = OpBitcast %_ptr_Generic_double %arrayidx95
               OpStore %187 %86 Aligned 8
        %188 = OpBitcast %_ptr_Generic_uchar %35
 %arrayidx97 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %188 %ulong_48
        %190 = OpBitcast %_ptr_Generic_double %arrayidx97
               OpStore %190 %90 Aligned 8
        %191 = OpBitcast %_ptr_Generic_uchar %35
 %arrayidx99 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %191 %ulong_56
        %193 = OpBitcast %_ptr_Generic_double %arrayidx99
               OpStore %193 %95 Aligned 8
        %194 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx101 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %194 %ulong_64
        %197 = OpBitcast %_ptr_Generic_double %arrayidx101
               OpStore %197 %100 Aligned 8
        %198 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx103 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %198 %ulong_72
        %201 = OpBitcast %_ptr_Generic_double %arrayidx103
               OpStore %201 %105 Aligned 8
        %202 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx105 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %202 %ulong_80
        %205 = OpBitcast %_ptr_Generic_double %arrayidx105
               OpStore %205 %110 Aligned 8
        %206 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx107 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %206 %ulong_88
        %209 = OpBitcast %_ptr_Generic_double %arrayidx107
               OpStore %209 %112 Aligned 8
        %210 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx109 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %210 %ulong_96
        %213 = OpBitcast %_ptr_Generic_double %arrayidx109
               OpStore %213 %116 Aligned 8
        %214 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx111 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %214 %ulong_104
        %217 = OpBitcast %_ptr_Generic_double %arrayidx111
               OpStore %217 %120 Aligned 8
        %218 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx113 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %218 %ulong_112
        %221 = OpBitcast %_ptr_Generic_double %arrayidx113
               OpStore %221 %124 Aligned 8
        %222 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx115 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %222 %ulong_120
        %225 = OpBitcast %_ptr_Generic_double %arrayidx115
               OpStore %225 %128 Aligned 8
        %226 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx117 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %226 %ulong_128
        %229 = OpBitcast %_ptr_Generic_double %arrayidx117
               OpStore %229 %132 Aligned 8
        %230 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx119 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %230 %ulong_136
        %233 = OpBitcast %_ptr_Generic_double %arrayidx119
               OpStore %233 %136 Aligned 8
        %234 = OpBitcast %_ptr_Generic_uchar %35
%arrayidx121 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %234 %ulong_144
        %237 = OpBitcast %_ptr_Generic_double %arrayidx121
               OpStore %237 %140 Aligned 8
               OpReturn
               OpFunctionEnd
; ---- Reset function ----
        %239 = OpFunction %void None %238
    %entry_0 = OpLabel
               OpStore %__hip_cuid_5719198a7d94f6f8 %uchar_0 Aligned 1
               OpReturn
               OpFunctionEnd
; ---- Entry point: calls inner function ----
        %241 = OpFunction %void None %21
%out_coerce_0 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
     %dirX_0 = OpFunctionParameter %_ptr_Function_struct_double8
     %dirY_0 = OpFunctionParameter %_ptr_Function_struct_double8
%weight_coerce_0 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
    %omega_0 = OpFunctionParameter %double
       %f0_0 = OpFunctionParameter %double
    %f1234_0 = OpFunctionParameter %_ptr_Function_struct_HIP_vector_type
    %f5678_0 = OpFunctionParameter %_ptr_Function_struct_HIP_vector_type
        %250 = OpLabel
        %251 = OpFunctionCall %void %22 %out_coerce_0 %dirX_0 %dirY_0 %weight_coerce_0 %omega_0 %f0_0 %f1234_0 %f5678_0
               OpReturn
               OpFunctionEnd
; ---- Entry point for reset ----
        %252 = OpFunction %void None %238
        %253 = OpLabel
        %254 = OpFunctionCall %void %239
               OpReturn
               OpFunctionEnd

Workaround

Adding Export linkage to the inner function prevents IGC from inlining it:

+ OpDecorate %22 LinkageAttributes "inner_impl" Export

After this one-line change, the SPIR-V assembles, validates, and produces correct results.

Analysis

The SPIR-V module has a kernel entry point (%241) that forwards all arguments to an inner function (%22) via OpFunctionCall. When the inner function lacks Export linkage, IGC inlines it into the entry point. During this inlining, the byval align 64 struct argument accesses (via OpInBoundsPtrAccessChain on the Function-space pointer) are incorrectly compiled — alternating fields in the upper half of the struct (offsets 40, 56 for one struct; 32, 48 for the other) read as 0.

The SPIR-V passes spirv-val validation.

Impact

This blocks chipStar (HIP implementation for SPIR-V GPUs) from correctly running kernels with __align__(64) struct parameters, including the HeCBench fluidSim LBM benchmark.

Related: chipStar PR #1185, chipStar issue #1180

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions