diff --git a/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py b/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py index a0fd6cee..1dc65cb2 100644 --- a/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py +++ b/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py @@ -465,8 +465,14 @@ def tensor_zp_scale_from_min_max( else np.maximum(neg_clipping_values, float16_min) ) + # Include 0 to the range to support zero-padding(bound_min <= 0 <= bound_max). + # See: https://arxiv.org/pdf/1712.05877.pdf + # This also ensures that tensor with 0 variation (i.e., min_value == + # max_value) can be quantized correctly. + bound_max = np.maximum(max_value, np.zeros_like(max_value)) + bound_min = np.minimum(min_value, np.zeros_like(min_value)) if symmetric: - bound = np.maximum(np.abs(min_value), np.abs(max_value)) + bound = np.maximum(np.abs(bound_max), np.abs(bound_min)) bound = np.maximum(bound, min_bound) if clipping_values is not None: bound = np.clip(bound, neg_clipping_values, pos_clipping_values) @@ -478,11 +484,6 @@ def tensor_zp_scale_from_min_max( scale = bound / qmax zp = np.zeros_like(scale, dtype=np.int32) else: - # Include 0 to the range to support zero-padding. - # See: https://arxiv.org/pdf/1712.05877.pdf - # This ensures bound_min <= 0 <= bound_max. - bound_max = np.maximum(max_value, np.zeros_like(max_value)) - bound_min = np.minimum(min_value, np.zeros_like(min_value)) bound = np.maximum(bound_max - bound_min, min_bound) if clipping_values is not None: bound = np.clip(bound, -clipping_values, clipping_values) diff --git a/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py b/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py index 9f55a559..6ea0401c 100644 --- a/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py +++ b/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py @@ -17,10 +17,10 @@ import dataclasses -from absl.testing import parameterized import numpy as np from tensorflow.python.platform import googletest +from absl.testing import parameterized from ai_edge_quantizer import qtyping from ai_edge_quantizer.algorithms.uniform_quantize import uniform_quantize_tensor @@ -464,6 +464,67 @@ def test_tensor_zp_scale_from_min_max(self, num_bits, symmetric): # Range has to be extended to include zero. self.assertEqual(calculated_min, 0) + @parameterized.product( + val=[5.0, 0.0, -5.0], + num_bits=[8, 16], + symmetric=[True, False], + granularity=[ + qtyping.QuantGranularity.TENSORWISE, + qtyping.QuantGranularity.CHANNELWISE, + ], + ) + def test_tensor_zp_scale_from_min_max_same_min_max( + self, val, num_bits, symmetric, granularity + ): + if granularity == qtyping.QuantGranularity.TENSORWISE: + min_val = np.array([[val]], dtype=np.float32) + max_val = np.array([[val]], dtype=np.float32) + expected_shape = (1, 1) + else: # CHANNELWISE + min_val = np.array([val, val], dtype=np.float32) + max_val = np.array([val, val], dtype=np.float32) + expected_shape = (2,) + + zp, scale = uniform_quantize_tensor.tensor_zp_scale_from_min_max( + min_val, + max_val, + num_bits, + symmetric, + granularity, + ) + self.assertEqual(zp.shape, scale.shape) + self.assertEqual(zp.shape, expected_shape) + + max_q = 2**num_bits / 2 - 1 + calculated_max = scale * (max_q - zp) + min_q = -(2**num_bits) / 2 + # Narrow range for symmetric quantization with num_bits >= 8. + if symmetric and num_bits >= 8: + min_q += 1 + calculated_min = scale * (min_q - zp) + + min_bound = 1e-4 # 1e-6 precision for int8 and 1e-8 for int16. + if symmetric: + bound = np.maximum(np.abs(val), min_bound) + for i in range(calculated_max.size): + self.assertAlmostEqual(calculated_max.flatten()[i], bound, delta=1e-3) + self.assertAlmostEqual(calculated_min.flatten()[i], -bound, delta=1e-3) + else: + if val == 0.0: + for i in range(calculated_max.size): + self.assertAlmostEqual( + calculated_max.flatten()[i], min_bound, delta=1e-9 + ) + self.assertAlmostEqual(calculated_min.flatten()[i], 0.0, delta=1e-9) + else: + for i in range(calculated_max.size): + self.assertAlmostEqual( + calculated_max.flatten()[i], np.maximum(val, 0), delta=1e-3 + ) + self.assertAlmostEqual( + calculated_min.flatten()[i], np.minimum(val, 0), delta=1e-3 + ) + @parameterized.parameters( # number of bits, is_symmetric, max bound of the quantized range. (4, True, 7),