diff options
author | James Peet <james.peet@arm.com> | 2021-07-19 16:47:58 +0100 |
---|---|---|
committer | James Peet <james.peet@arm.com> | 2021-07-26 12:05:57 +0100 |
commit | 7519d50c993d60faa1ea09e56abfbf17cef23b49 (patch) | |
tree | 18bc526a1d1146dc3061f95418b1345bbdc481e8 /ethosu/vela | |
parent | daed1529848465aab221a30008f455f9fa03c8d4 (diff) | |
download | ethos-u-vela-7519d50c993d60faa1ea09e56abfbf17cef23b49.tar.gz |
MLBEDSW-4892: Fix crash affecting biases without quantization.
Remove quant_values attribute from Tensor class.
It only needs a single values attribute, holding either
quantized or unquantized values as appropriate.
Change-Id: Ie96f80ac58061b6077e0f7048dc60209fdfbcafa
Signed-off-by: James Peet <james.peet@arm.com>
Diffstat (limited to 'ethosu/vela')
-rw-r--r-- | ethosu/vela/data_type.py | 12 | ||||
-rw-r--r-- | ethosu/vela/high_level_command_to_npu_op.py | 3 | ||||
-rw-r--r-- | ethosu/vela/npu_serialisation.py | 2 | ||||
-rw-r--r-- | ethosu/vela/reader_util.py | 3 | ||||
-rw-r--r-- | ethosu/vela/supported_operators.py | 6 | ||||
-rw-r--r-- | ethosu/vela/tensor.py | 35 | ||||
-rw-r--r-- | ethosu/vela/test/test_graph_optimiser.py | 6 | ||||
-rw-r--r-- | ethosu/vela/test/test_supported_operators.py | 2 | ||||
-rw-r--r-- | ethosu/vela/tflite_graph_optimiser.py | 46 | ||||
-rw-r--r-- | ethosu/vela/tflite_reader.py | 3 | ||||
-rw-r--r-- | ethosu/vela/tflite_writer.py | 4 | ||||
-rw-r--r-- | ethosu/vela/tosa_reader.py | 1 | ||||
-rw-r--r-- | ethosu/vela/weight_compressor.py | 8 |
13 files changed, 58 insertions, 73 deletions
diff --git a/ethosu/vela/data_type.py b/ethosu/vela/data_type.py index 07086d6b..470504d2 100644 --- a/ethosu/vela/data_type.py +++ b/ethosu/vela/data_type.py @@ -18,6 +18,8 @@ import enum from typing import Any +import numpy as np + from .numeric_util import round_up_divide @@ -99,6 +101,16 @@ class DataType: __repr__ = __str__ + def as_numpy_type(self): + numpy_dtype_code = { + BaseType.UnsignedInt: "u", + BaseType.SignedInt: "i", + BaseType.Float: "f", + BaseType.Complex: "c", + } + assert self.type in numpy_dtype_code, f"Failed to interpret {self} as a numpy dtype" + return np.dtype(numpy_dtype_code[self.type] + str(self.size_in_bytes())) + stem_name = { BaseType.UnsignedInt: ("uint%s", True), BaseType.SignedInt: ("int%s", True), diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py index 80d0e476..9b76ec1f 100644 --- a/ethosu/vela/high_level_command_to_npu_op.py +++ b/ethosu/vela/high_level_command_to_npu_op.py @@ -437,8 +437,7 @@ def create_npu_elementwise_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> Npu npu_op.ifm2.quantization = get_ifm_or_ifm2_quantization(ps, cmd.ifm2_tensor) if cmd.ifm2_tensor.shape == []: # scalar - assert cmd.ifm2_tensor.quant_values.size == 1 - npu_op.ifm2_scalar = cmd.ifm2_tensor.values.item(0) + npu_op.ifm2_scalar = cmd.ifm2_tensor.get_scalar() npu_op.ifm2.shape = NpuShape3D(height=0, width=0, depth=0) else: ifm2_blk = cmd.ifm2_box.get_block() diff --git a/ethosu/vela/npu_serialisation.py b/ethosu/vela/npu_serialisation.py index 06ea61df..ea35ac60 100644 --- a/ethosu/vela/npu_serialisation.py +++ b/ethosu/vela/npu_serialisation.py @@ -48,7 +48,7 @@ def copy_compressed_values_to_memory_tensor(memory_tensor, src_tensor): def copy_ifm_values_to_memory_tensor(memory_tensor, src_tensor): start_addr = src_tensor.address - values = src_tensor.quant_values.flatten() if src_tensor.quant_values is not None else src_tensor.values.flatten() + values = src_tensor.values.flatten() if src_tensor.dtype.size_in_bytes() > 1: values = np.frombuffer(values.tobytes(), dtype=np.uint8) end_addr = start_addr + values.size diff --git a/ethosu/vela/reader_util.py b/ethosu/vela/reader_util.py index 233286c8..476b70aa 100644 --- a/ethosu/vela/reader_util.py +++ b/ethosu/vela/reader_util.py @@ -34,9 +34,6 @@ def clone_and_reshape_tensor(src_tens, reorder, set_unique): if tens.values is not None: tens.values = tens.values.transpose(reorder) - if tens.quant_values is not None: - tens.quant_values = tens.quant_values.transpose(reorder) - op = Operation(Op.Const, tens.name) op.set_output_tensor(tens) return tens diff --git a/ethosu/vela/supported_operators.py b/ethosu/vela/supported_operators.py index c993da13..663c78f8 100644 --- a/ethosu/vela/supported_operators.py +++ b/ethosu/vela/supported_operators.py @@ -532,7 +532,7 @@ class SupportedOperators: def constraint_weights_limit(cls, op): "The sum of the weights cannot exceed {}" weights = op.weights - values = weights.quant_values.astype(np.int64) - weights.quantization.zero_point + values = weights.values.astype(np.int64) - weights.quantization.zero_point limit = np.amax(np.sum(np.absolute(values), axis=(0, 1, 2))) valid = limit <= cls.weights_limit return valid, f"Tensor '{weights.name}' has the sum of weights: {limit}" @@ -551,8 +551,8 @@ class SupportedOperators: def constraint_bias_40bit(op): "Optional Bias tensor values must fit within 40-bits" bias = op.bias - if bias and bias.dtype == DataType.int64 and bias.quant_values is not None: - valid = all(len(bin(quant_value)[2:]) <= 40 for quant_value in bias.quant_values) + if bias and bias.dtype == DataType.int64 and bias.values is not None: + valid = all(len(bin(quant_value)[2:]) <= 40 for quant_value in bias.values) return valid, f"Tensor '{bias.name}' has values larger than 40-bits" return True, "Op has no bias tensor, or it fits in 40-bit" diff --git a/ethosu/vela/tensor.py b/ethosu/vela/tensor.py index 7dbdcddf..677757ca 100644 --- a/ethosu/vela/tensor.py +++ b/ethosu/vela/tensor.py @@ -254,20 +254,8 @@ class QuantizationParameters: res.quant_max = self.quant_max return res - def dequantize(self, values): - if self.zero_point.size == 1 and self.scale_f32.size == 1: - # same scale is used for all values - res = (values.astype(np.float64) - self.zero_point) * self.scale_f32 - else: - # a different scale is used for different sets of values - values_as_float = values.astype(np.float64) - - # this is not compatible with the format of depthwise weights, - # where input is at index 3 (Output, Kh, Kw, Input) - # return the quantized values - return np.ndarray((values_as_float.shape)) - - return res + def dequantize(self, values) -> np.ndarray: + return np.subtract(values, self.zero_point) * self.scale_f32 def is_scaling_equal(self, other: Optional["QuantizationParameters"]) -> bool: # quantisation parameter scaling is not equal if 'other' is None because @@ -300,16 +288,12 @@ def create_const_tensor( value_dtype: np.dtype = None, purpose: TensorPurpose = TensorPurpose.Unknown, quantization: QuantizationParameters = None, - quant_value_dtype: np.dtype = None, ): # Tensor const_tensor = Tensor(shape, dtype, name + "_0") const_tensor.purpose = purpose const_tensor.quantization = quantization const_tensor.values = np.array(values, dtype=value_dtype) - const_tensor.quant_values = np.frombuffer( - const_tensor.values.tobytes(), dtype=np.uint8 if not quant_value_dtype else quant_value_dtype - ) # Operator const_op = Operation(Op.Const, name) const_op.set_output_tensor(const_tensor) @@ -349,7 +333,6 @@ class Tensor: "ops", "consumer_list", "values", - "quant_values", "compressed_values", "compressed_values_substream_offsets", "mem_area", @@ -391,8 +374,7 @@ class Tensor: self.ops: List[Operation] = [] self.consumer_list: List[Operation] = [] - self.values: Optional[np.ndarray] = None - self.quant_values: Optional[np.ndarray] = None + self.values: Optional[np.ndarray] = None # elements are of type self.dtype self.compressed_values: Optional[np.ndarray] = None self.compressed_values_substream_offsets: Optional[List] = None self.mem_area: MemArea = MemArea.Unknown @@ -816,6 +798,17 @@ class Tensor: return (self.dtype.type & BaseType.Int) != 0 and self.quantization.is_valid() + def get_scalar(self): + """ + return: Unquantized or dequantized scalar value + rtype: self.dtype (if unquantized) or float (if dequantized) + """ + assert self.values.size == 1, "get_scalar called on non-scalar tensor" + if self.is_quantized(): + return self.quantization.dequantize(self.values).item(0) + else: + return self.values.item(0) + def __lt__(self, other: "Tensor") -> bool: return self.equivalence_id < other.equivalence_id diff --git a/ethosu/vela/test/test_graph_optimiser.py b/ethosu/vela/test/test_graph_optimiser.py index b37bac80..e0eedd66 100644 --- a/ethosu/vela/test/test_graph_optimiser.py +++ b/ethosu/vela/test/test_graph_optimiser.py @@ -139,8 +139,7 @@ def create_pad_and_conv2d( conv_out_tens = Tensor(in_shape, in_dtype, "output") conv_out_tens.quantization = qp.clone() weight_tens = Tensor([kernel_size, kernel_size, in_shape[-1], out_shape[-1]], in_dtype, "weights") - weight_tens.values = np.zeros(weight_tens.shape) - weight_tens.quant_values = np.zeros(weight_tens.shape, np.int8) + weight_tens.values = np.zeros(weight_tens.shape, in_dtype.as_numpy_type()) weight_tens.quantization = qp.clone() bias_tens = Tensor(out_shape, pad_dtype, "biases") attrs = {"padding": pad_setting, "stride_w": 2, "stride_h": 2, "dilation_w_factor": 1, "dilation_h_factor": 1} @@ -349,8 +348,7 @@ def test_remove_reshape(): conv_ofm = Tensor([1, 8, 8, 16], DataType.uint8, "output") conv_ofm.quantization = quant.clone() weight_tens = Tensor([1, 1, 16, 16], DataType.uint8, "weights") - weight_tens.values = np.zeros(weight_tens.shape) - weight_tens.quant_values = np.zeros(weight_tens.shape, np.uint8) + weight_tens.values = np.zeros(weight_tens.shape, np.uint8) weight_tens.quantization = quant.clone() bias_tens = Tensor([16], DataType.int32, "biases") diff --git a/ethosu/vela/test/test_supported_operators.py b/ethosu/vela/test/test_supported_operators.py index 666a5ecc..38308154 100644 --- a/ethosu/vela/test/test_supported_operators.py +++ b/ethosu/vela/test/test_supported_operators.py @@ -246,7 +246,7 @@ def test_constraint_bias_40bit(): op = testutil.create_op_with_quant_tensors(Op.Conv2DBias, [1, 1, 1, 1], [1, 1, 1, 1], weights_shape=[1, 1, 1, 1]) op.attrs = {"stride_w": 1, "stride_h": 1} bias = Tensor([1, 1, 1, 1], DataType.int64, "bias") - bias.quant_values = np.array([0x01FF_FFFF_FFFF]) + bias.values = np.array([0x01FF_FFFF_FFFF]) op.add_input_tensor(bias) assert not support.is_operator_supported(op) diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py index 3d9eeb8a..9fdff8ff 100644 --- a/ethosu/vela/tflite_graph_optimiser.py +++ b/ethosu/vela/tflite_graph_optimiser.py @@ -354,8 +354,7 @@ def convert_resizebilinear_1x1_to_add(op): # Create an input tensor filled with zeros shape = op.ofm_shapes[0].as_list() tens = Tensor(shape, op.inputs[0].dtype, op.inputs[1].name + "_add") - tens.values = np.zeros(shape) - tens.quant_values = np.zeros(shape, np.uint8) + tens.values = np.zeros(shape, tens.dtype.as_numpy_type()) tens.quantization = QuantizationParameters(0.0, 255.0) tens.quantization.scale_f32 = 1.0 tens.quantization.zero_point = 0 @@ -470,8 +469,8 @@ def convert_batched_fc_shape(op, arch, nng): # Reshape Weights to be 4D. IO becomes HWIO weight_tensor = op.inputs[1] - weight_tensor.quant_values = np.expand_dims(np.expand_dims(weight_tensor.quant_values, axis=0), axis=0) - weight_tensor.set_all_shapes(list(weight_tensor.quant_values.shape)) + weight_tensor.values = np.expand_dims(np.expand_dims(weight_tensor.values, axis=0), axis=0) + weight_tensor.set_all_shapes(list(weight_tensor.values.shape)) n = op.ofm_shapes[0].batch h, w = batching_split.get(n, (1, n)) @@ -608,8 +607,8 @@ def convert_depthwise_to_conv(op, arch, nng): del op.attrs["channel_multiplier"] del op.attrs["depth_multiplier"] - weight_tensor.quant_values = np.transpose(weight_tensor.quant_values, (0, 1, 3, 2)) - weight_tensor.set_all_shapes(list(weight_tensor.quant_values.shape)) + weight_tensor.values = np.transpose(weight_tensor.values, (0, 1, 3, 2)) + weight_tensor.set_all_shapes(list(weight_tensor.values.shape)) else: raise UnsupportedFeatureError( f"Unsupported 'DEPTHWISE_CONV_2D' with depth_multiplier = {op.attrs['depth_multiplier']},", @@ -622,8 +621,8 @@ def convert_depthwise_to_conv(op, arch, nng): def reorder_depthwise_weights(op, arch, nng): if op.type.is_depthwise_conv2d_op(): weight_tensor = op.inputs[1] - weight_tensor.quant_values = np.transpose(weight_tensor.quant_values, (0, 1, 3, 2)) - weight_tensor.set_all_shapes(list(weight_tensor.quant_values.shape)) + weight_tensor.values = np.transpose(weight_tensor.values, (0, 1, 3, 2)) + weight_tensor.set_all_shapes(list(weight_tensor.values.shape)) weight_tensor.weight_transpose_depthwise = True return op @@ -654,14 +653,14 @@ def optimise_strided_conv(op, arch, nng): for i in range(weight_shape[0]): padded_array[i] = np.vstack( [ - weight_tensor.quant_values[i], + weight_tensor.values[i], np.full((1, weight_shape[2], weight_shape[3]), weight_tensor.quantization.zero_point), ] ) - weight_tensor.quant_values = padded_array + weight_tensor.values = padded_array weight_shape[1] //= 2 weight_shape[2] *= 2 - weight_tensor.quant_values = np.reshape(weight_tensor.quant_values, weight_shape) + weight_tensor.values = np.reshape(weight_tensor.values, weight_shape) weight_tensor.set_all_shapes(weight_shape) # If multiple copies of the weights are used, we could avoid # them having the same address by changing the value_id @@ -692,8 +691,8 @@ def convert_conv_to_fc(op, arch, nng): } # Reshape Weights to be 2D. HWIO becomes just IO (as H and W are 1, they can just be dropped) weight_tensor = op.inputs[1] - weight_tensor.quant_values = weight_tensor.quant_values.squeeze(axis=(0, 1)) - weight_tensor.set_all_shapes(list(weight_tensor.quant_values.shape)) + weight_tensor.values = weight_tensor.values.squeeze(axis=(0, 1)) + weight_tensor.set_all_shapes(list(weight_tensor.values.shape)) DebugDatabase.add_optimised(op, op) return op @@ -729,11 +728,11 @@ def fixup_elementwise_with_scalars(op, arch, nng): ifm2_tensor.shape = full_shape(len(ifm_tensor.shape), ifm2_tensor.shape, 1) elif diff < 0: ifm_tensor.shape = full_shape(len(ifm2_tensor.shape), ifm_tensor.shape, 1) - elif ifm_tensor.shape == [] and ifm_tensor.quant_values is None: + elif ifm_tensor.shape == [] and ifm_tensor.values is None: # IFM is marked as a scalar, but is a result of an operation; change it to a shape of size 1 ifm_tensor.shape = len(ifm2_tensor.shape) * [1] ifm_tensor.storage_shape = ifm_tensor.shape - elif ifm2_tensor.shape == [] and ifm2_tensor.quant_values is None: + elif ifm2_tensor.shape == [] and ifm2_tensor.values is None: # IFM2 is marked as a scalar, but is a result of an operation; change it to a shape of size 1 ifm2_tensor.shape = len(ifm_tensor.shape) * [1] ifm2_tensor.storage_shape = ifm2_tensor.shape @@ -811,7 +810,7 @@ def convert_mul_max_to_abs_or_lrelu(op, arch, nng): # to produce bit exact results, the alpha is not enough; # save additional scaling info in attr "alpha_scale", to be used as input # to the LUT construction - alpha_scalar = const_tens.quant_values - const_tens.quantization.zero_point + alpha_scalar = const_tens.values - const_tens.quantization.zero_point mul_ifm_scale = np.double(ifm.quantization.scale_f32) mul_ifm2_scale = np.double(const_tens.quantization.scale_f32) mul_ofm_scale = np.double(mul_ofm.quantization.scale_f32) @@ -912,7 +911,7 @@ def convert_lrelu_to_mul_max(op, arch): alpha_tens = create_const_tensor( op.name + "_alpha_scalar", [], ifm.dtype, [scalar], np.float32, quantization=quantization ) - alpha_tens.quant_values = np.array([1]) + alpha_tens.values = np.array([1]) mul_alpha.add_input_tensor(alpha_tens) fm_alpha = ofm.clone(op.name + "_alpha", set_unique=True) mul_alpha.set_output_tensor(fm_alpha) @@ -1209,7 +1208,7 @@ def replace_pad_by_hw_pad(op: Operation, arch, nng): purpose=TensorPurpose.Weights, quantization=quantization, ) - weight_tens.quant_values = weights + weight_tens.values = weights op.type = Op.DepthwiseConv2DBias op.inputs = [] op.add_input_tensor(ifm) @@ -1331,7 +1330,6 @@ def fixup_bias_tensors(op, arch, nng): nr_biases = op.inputs[1].shape[-1] bias_values = [0] * nr_biases bias_tensor = create_const_tensor(op.name + "_bias", [nr_biases], DataType.int32, bias_values) - bias_tensor.quant_values = bias_tensor.values op.set_input_tensor(bias_tensor, op.type.info.indices.biases[0]) return op @@ -1409,13 +1407,7 @@ def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng): quant = QuantizationParameters() quant.zero_point = 0 bias_term_tens = create_const_tensor( - op.name + "_bias", - [1, 1, 1, 1], - DataType.int16, - [bias_term], - np.int16, - quantization=quant, - quant_value_dtype=np.int16, + op.name + "_bias", [1, 1, 1, 1], DataType.int16, [bias_term], np.int16, quantization=quant, ) add_op.add_input_tensor(bias_term_tens) add_op.set_output_tensor(op.ofm) @@ -1514,7 +1506,7 @@ def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng): ), 1, ) - op.weights.quant_values = np.reshape(op.inputs[1].quant_values, weight_shape) + op.weights.values = np.reshape(op.inputs[1].values, weight_shape) # Add None bias tensor op.inputs.append(None) diff --git a/ethosu/vela/tflite_reader.py b/ethosu/vela/tflite_reader.py index 30bf32af..fbee7930 100644 --- a/ethosu/vela/tflite_reader.py +++ b/ethosu/vela/tflite_reader.py @@ -107,9 +107,6 @@ class TFLiteSubgraph: tens.values = np.array(buf.view(np_dtype)) else: tens.values = np.array(buf.view(np_dtype).reshape(shape)) - if tens.quantization is not None: - tens.quant_values = tens.values - tens.values = tens.quantization.dequantize(tens.quant_values) return tens def parse_operator(self, op_index, op_data): diff --git a/ethosu/vela/tflite_writer.py b/ethosu/vela/tflite_writer.py index fd3bf421..e6dd85b5 100644 --- a/ethosu/vela/tflite_writer.py +++ b/ethosu/vela/tflite_writer.py @@ -243,9 +243,7 @@ class TFLiteSerialiser: def serialise_tensor(self, tens): builder = self.builder tens_shape = tens.shape - values = tens.quant_values - if values is None: - values = tens.values + values = tens.values if values is None: values = np.empty(shape=(0), dtype=np.uint8) diff --git a/ethosu/vela/tosa_reader.py b/ethosu/vela/tosa_reader.py index e51ead1d..364d9a63 100644 --- a/ethosu/vela/tosa_reader.py +++ b/ethosu/vela/tosa_reader.py @@ -192,7 +192,6 @@ class TosaSubgraph: fname = decode_str(tens_data.NpyFilename()) tens.values = np.load(os.path.join(file_path, fname)) assert list(tens.values.shape) == tens.shape - tens.quant_values = tens.values except (struct.error, TypeError, RuntimeError) as e: print(f'Error: Invalid npy file. Got "{e}" ') sys.exit(1) diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py index 7e33e93b..65361436 100644 --- a/ethosu/vela/weight_compressor.py +++ b/ethosu/vela/weight_compressor.py @@ -100,7 +100,7 @@ class CompressedWeightCache: def create_weight_compression_config(weight_tens, npu_block_type, ofm_block_depth, ofm_depth_step, dilation): # Note: for an ofm block only its depth is used in weight compression. # And block depth > ofm depth gives same result as block depth == ofm depth - block_depth = min(ofm_block_depth, weight_tens.quant_values.shape[-1]) + block_depth = min(ofm_block_depth, weight_tens.values.shape[-1]) return WeightCompressionConfig(npu_block_type, block_depth, ofm_depth_step, dilation, weight_tens.value_id) @@ -214,7 +214,7 @@ def _prepare_scale_and_bias(arch, tens, rescale_for_faf, explicit_scaling): # the operator should only have a single output assert len(tens.consumer_list[0].outputs) == 1 - biases = tens.quant_values + biases = tens.values first_consumer_op = tens.consumer_list[0] ifm_dtype = first_consumer_op.inputs[0].dtype @@ -318,7 +318,7 @@ def encode_weight_and_scale_tensor( assert weight_tens.quantization.zero_point is not None # Early zero-point correction - quant_buf = weight_tens.quant_values.astype(np.int16) + quant_buf = weight_tens.values.astype(np.int16) # the zero point can be either a native or numpy type if isinstance(weight_tens.quantization.zero_point, (int, float)): zero_point = np.int16(weight_tens.quantization.zero_point) @@ -363,7 +363,7 @@ def encode_weight_and_scale_tensor( scale_tens.element_size_bytes = 10 # Slice the weight stream up depth-ways into bricks and compress - full_ofm_depth = weight_tens.quant_values.shape[-1] + full_ofm_depth = weight_tens.values.shape[-1] ofm_block_depth = block_config.ofm_block.depth weight_range_index = 0 |