-
Notifications
You must be signed in to change notification settings - Fork 467
Description
Hi, I have encountered a problem “torch.AcceleratorError: CUDA error: an illegal memory access was encountere” in GPU
torch_version: 2.11.0a0+eb65b36914.nv26.2
The traceback as follows:
`
/usr/local/lib/python3.12/dist-packages/torch/utils/_contextlib.py:124: in decorate_context
return func(*args, **kwargs)
src/diffusers/pipelines/hidream_image/pipeline_hidream_image.py:995: in call
noise_pred = self.transformer(
/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py:1778: in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py:1789: in _call_impl
return forward_call(*args, **kwargs)
/usr/local/lib/python3.12/dist-packages/accelerate/hooks.py:170: in new_forward
output = module._old_forward(*args, **kwargs)
src/diffusers/models/transformers/transformer_hidream_image.py:887: in forward
hidden_states, initial_encoder_hidden_states = block(
/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py:1778: in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py:1789: in _call_impl
return forward_call(*args, **kwargs)
src/diffusers/models/transformers/transformer_hidream_image.py:597: in forward
return self.block(
/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py:1778: in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py:1789: in _call_impl
return forward_call(*args, **kwargs)
src/diffusers/models/transformers/transformer_hidream_image.py:577: in forward
ff_output_i = gate_mlp_i * self.ff_i(norm_hidden_states)
/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py:1778: in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py:1789: in _call_impl
return forward_call(*args, **kwargs)
src/diffusers/models/transformers/transformer_hidream_image.py:386: in forward
y = y + self.shared_experts(identity)
/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py:1778: in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py:1789: in _call_impl
return forward_call(*args, **kwargs)
src/diffusers/models/transformers/transformer_hidream_image.py:41: in forward
return self.w2(torch.nn.functional.silu(self.w1(x)) * self.w3(x))
/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py:1778: in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py:1789: in call_impl
return forward_call(*args, **kwargs)
/usr/local/lib/python3.12/dist-packages/torch/nn/modules/linear.py:134: in forward
return F.linear(input, self.weight, self.bias)
/usr/local/lib/python3.12/dist-packages/torchao/utils.py:637: in dispatch__torch_function
return cls._TORCH_FN_TABLE[cls][func](func, types, args, kwargs)
/usr/local/lib/python3.12/dist-packages/torchao/utils.py:440: in wrapper
return _func(f, types, args, kwargs)
/usr/local/lib/python3.12/dist-packages/torchao/quantization/linear_activation_quantized_tensor.py:146: in _
return weight_tensor._quantized_linear_op(input_tensor, weight_tensor, bias)
/usr/local/lib/python3.12/dist-packages/torchao/quantization/linear_activation_quantized_tensor.py:88: in quantized_linear_op
return torch.nn.functional.linear(
/usr/local/lib/python3.12/dist-packages/torchao/utils.py:637: in dispatch__torch_function
return cls._TORCH_FN_TABLE[cls][func](func, types, args, kwargs)
/usr/local/lib/python3.12/dist-packages/torchao/utils.py:440: in wrapper
return _func(f, types, args, kwargs)
/usr/local/lib/python3.12/dist-packages/torchao/dtypes/affine_quantized_tensor_ops.py:284: in _
return weight_tensor._quantized_linear_op(input_tensor, weight_tensor, bias)
/usr/local/lib/python3.12/dist-packages/torchao/dtypes/affine_quantized_tensor_ops.py:176: in _quantized_linear_op
return impl(input_tensor, weight_tensor, bias)
/usr/local/lib/python3.12/dist-packages/torchao/dtypes/uintx/plain_layout.py:301: in _linear_int8_act_int8_weight_impl
y_dot_scaled = int_scaled_matmul(
/usr/local/lib/python3.12/dist-packages/torchao/kernel/intmm.py:142: in int_scaled_matmul
c = safe_int_mm(a, b)
/usr/local/lib/python3.12/dist-packages/torchao/kernel/intmm.py:46: in safe_int_mm
if dynamo_is_compiling() or "FakeTensor" in input.repr():
/usr/local/lib/python3.12/dist-packages/torch/_tensor.py:574: in repr
return torch._tensor_str._str(self, tensor_contents=tensor_contents)
/usr/local/lib/python3.12/dist-packages/torch/_tensor_str.py:726: in _str
return _str_intern(self, tensor_contents=tensor_contents)
/usr/local/lib/python3.12/dist-packages/torch/_tensor_str.py:643: in _str_intern
tensor_str = _tensor_str(self, indent)
/usr/local/lib/python3.12/dist-packages/torch/_tensor_str.py:375: in _tensor_str
formatter = _Formatter(get_summarized_data(self) if summarize else self)
/usr/local/lib/python3.12/dist-packages/torch/_tensor_str.py:142: in init
value_str = f"{value}"
self = <[AcceleratorError("CUDA error: an illegal memory access was encountered\nSearch for cudaErrorIllegalAddress' in http...pile with TORCH_USE_CUDA_DSA` to enable device-side assertions.\n") raised in repr()] Tensor object at 0x7fcd1c253570>
format_spec = ''
def __format__(self, format_spec):
if has_torch_function_unary(self):
return handle_torch_function(Tensor.__format__, (self,), self, format_spec)
if self.dim() == 0 and not self.is_meta and type(self) is Tensor:
# Use detach() here to avoid the warning when converting a scalar Tensor that
# requires gradients to a python number. It is ok for formatting.
return self.detach().item().__format__(format_spec)
E torch.AcceleratorError: CUDA error: an illegal memory access was encountered
E Search for cudaErrorIllegalAddress' in https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html for more information. E CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. E For debugging consider passing CUDA_LAUNCH_BLOCKING=1 E Compile with TORCH_USE_CUDA_DSA` to enable device-side assertions.
/usr/local/lib/python3.12/dist-packages/torch/_tensor.py:1150: AcceleratorError
`