Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -465,8 +465,14 @@ def tensor_zp_scale_from_min_max(
else np.maximum(neg_clipping_values, float16_min)
)

# Include 0 to the range to support zero-padding(bound_min <= 0 <= bound_max).
# See: https://arxiv.org/pdf/1712.05877.pdf
# This also ensures that tensor with 0 variation (i.e., min_value ==
# max_value) can be quantized correctly.
bound_max = np.maximum(max_value, np.zeros_like(max_value))
bound_min = np.minimum(min_value, np.zeros_like(min_value))
if symmetric:
bound = np.maximum(np.abs(min_value), np.abs(max_value))
bound = np.maximum(np.abs(bound_max), np.abs(bound_min))
bound = np.maximum(bound, min_bound)
if clipping_values is not None:
bound = np.clip(bound, neg_clipping_values, pos_clipping_values)
Expand All @@ -478,11 +484,6 @@ def tensor_zp_scale_from_min_max(
scale = bound / qmax
zp = np.zeros_like(scale, dtype=np.int32)
else:
# Include 0 to the range to support zero-padding.
# See: https://arxiv.org/pdf/1712.05877.pdf
# This ensures bound_min <= 0 <= bound_max.
bound_max = np.maximum(max_value, np.zeros_like(max_value))
bound_min = np.minimum(min_value, np.zeros_like(min_value))
bound = np.maximum(bound_max - bound_min, min_bound)
if clipping_values is not None:
bound = np.clip(bound, -clipping_values, clipping_values)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@

import dataclasses

from absl.testing import parameterized
import numpy as np

from tensorflow.python.platform import googletest
from absl.testing import parameterized
from ai_edge_quantizer import qtyping
from ai_edge_quantizer.algorithms.uniform_quantize import uniform_quantize_tensor

Expand Down Expand Up @@ -464,6 +464,67 @@ def test_tensor_zp_scale_from_min_max(self, num_bits, symmetric):
# Range has to be extended to include zero.
self.assertEqual(calculated_min, 0)

@parameterized.product(
val=[5.0, 0.0, -5.0],
num_bits=[8, 16],
symmetric=[True, False],
granularity=[
qtyping.QuantGranularity.TENSORWISE,
qtyping.QuantGranularity.CHANNELWISE,
],
)
def test_tensor_zp_scale_from_min_max_same_min_max(
self, val, num_bits, symmetric, granularity
):
if granularity == qtyping.QuantGranularity.TENSORWISE:
min_val = np.array([[val]], dtype=np.float32)
max_val = np.array([[val]], dtype=np.float32)
expected_shape = (1, 1)
else: # CHANNELWISE
min_val = np.array([val, val], dtype=np.float32)
max_val = np.array([val, val], dtype=np.float32)
expected_shape = (2,)

zp, scale = uniform_quantize_tensor.tensor_zp_scale_from_min_max(
min_val,
max_val,
num_bits,
symmetric,
granularity,
)
self.assertEqual(zp.shape, scale.shape)
self.assertEqual(zp.shape, expected_shape)

max_q = 2**num_bits / 2 - 1
calculated_max = scale * (max_q - zp)
min_q = -(2**num_bits) / 2
# Narrow range for symmetric quantization with num_bits >= 8.
if symmetric and num_bits >= 8:
min_q += 1
calculated_min = scale * (min_q - zp)

min_bound = 1e-4 # 1e-6 precision for int8 and 1e-8 for int16.
if symmetric:
bound = np.maximum(np.abs(val), min_bound)
for i in range(calculated_max.size):
self.assertAlmostEqual(calculated_max.flatten()[i], bound, delta=1e-3)
self.assertAlmostEqual(calculated_min.flatten()[i], -bound, delta=1e-3)
else:
if val == 0.0:
for i in range(calculated_max.size):
self.assertAlmostEqual(
calculated_max.flatten()[i], min_bound, delta=1e-9
)
self.assertAlmostEqual(calculated_min.flatten()[i], 0.0, delta=1e-9)
else:
for i in range(calculated_max.size):
self.assertAlmostEqual(
calculated_max.flatten()[i], np.maximum(val, 0), delta=1e-3
)
self.assertAlmostEqual(
calculated_min.flatten()[i], np.minimum(val, 0), delta=1e-3
)

@parameterized.parameters(
# number of bits, is_symmetric, max bound of the quantized range.
(4, True, 7),
Expand Down