aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRickard Bolin <rickard.bolin@arm.com>2023-05-08 12:23:34 +0000
committerRickard Bolin <rickard.bolin@arm.com>2023-05-15 07:40:53 +0000
commitfd6f624870b446207b4436cda5bd93dd4ad577ff (patch)
tree474912900797cfde4972a4936cb626e707f7c18f
parent8e74b57ee087481548dca49fddd134516a7a65e6 (diff)
downloadethos-u-vela-fd6f624870b446207b4436cda5bd93dd4ad577ff.tar.gz
MLBEDSW-7428: Remove unused rescale_for_faf
Remove unused parameter rescale for faf Change-Id: Id388d307f3eb0d27bce813ab58e3c9a5f4ba89ae Signed-off-by: Rickard Bolin <rickard.bolin@arm.com>
-rw-r--r--ethosu/vela/weight_compressor.py29
1 files changed, 10 insertions, 19 deletions
diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py
index a258be41..e4779bf5 100644
--- a/ethosu/vela/weight_compressor.py
+++ b/ethosu/vela/weight_compressor.py
@@ -250,7 +250,7 @@ def _get_output_quantization(op):
return quant
-def _prepare_scale_and_bias(arch, tens, rescale_for_faf, explicit_scaling):
+def _prepare_scale_and_bias(arch, tens, explicit_scaling):
assert tens.purpose in [TensorPurpose.FeatureMap, TensorPurpose.FSBias]
assert tens.format == TensorFormat.NHWC
# the connected operator should expect a bias input unless it is a FullyConnected
@@ -283,23 +283,14 @@ def _prepare_scale_and_bias(arch, tens, rescale_for_faf, explicit_scaling):
# uses double during scaling calculations
# TensorFlow Lite casts the scales slightly differently for uint8 and int8 as well as
# for FullyConnected operators
- if not rescale_for_faf:
- if ifm_dtype == DataType.uint8 or first_consumer_op.original_type == Op.FullyConnected:
- scales = [np.double(ifm_scale * weight_scale) / np.double(ofm_scale) for weight_scale in weight_scales]
- elif ifm_dtype == DataType.int8 or ifm_dtype == DataType.int16:
- scales = [
- (np.double(ifm_scale) * np.double(weight_scale)) / np.double(ofm_scale)
- for weight_scale in weight_scales
- ]
- else:
- raise UnsupportedFeatureError(f"Compression of {ifm_dtype} is not implemented; Tensor: '{tens.name}'")
+ if ifm_dtype == DataType.uint8 or first_consumer_op.original_type == Op.FullyConnected:
+ scales = [np.double(ifm_scale * weight_scale) / np.double(ofm_scale) for weight_scale in weight_scales]
+ elif ifm_dtype == DataType.int8 or ifm_dtype == DataType.int16:
+ scales = [
+ (np.double(ifm_scale) * np.double(weight_scale)) / np.double(ofm_scale) for weight_scale in weight_scales
+ ]
else:
- if ifm_dtype == DataType.uint8:
- scales = [np.double(ifm_scale * weight_scale * 0x3000) for weight_scale in weight_scales]
- elif ifm_dtype == DataType.int8 or ifm_dtype == DataType.int16:
- scales = [(np.double(ifm_scale * 0x3000) * np.double(weight_scale)) for weight_scale in weight_scales]
- else:
- raise UnsupportedFeatureError(f"Compression of {ifm_dtype} is not implemented; Tensor: '{tens.name}'")
+ raise UnsupportedFeatureError(f"Compression of {ifm_dtype} is not implemented; Tensor: '{tens.name}'")
if explicit_scaling:
assert len(explicit_scaling.shift) == len(explicit_scaling.multiplier)
@@ -326,7 +317,7 @@ def _prepare_scale_and_bias(arch, tens, rescale_for_faf, explicit_scaling):
def encode_weight_and_scale_tensor(
- arch, op, weight_tens, scale_tens, kernel, block_config, depth_offsets, rescale_for_faf=False
+ arch, op, weight_tens, scale_tens, kernel, block_config, depth_offsets
) -> Tuple[Optional[NpuWeightTensor], Optional[NpuWeightTensor]]:
npu_block_type = op.type.npu_block_type
@@ -407,7 +398,7 @@ def encode_weight_and_scale_tensor(
# Bias & scale
if do_scales:
- quantised_scales, biases = _prepare_scale_and_bias(arch, scale_tens, rescale_for_faf, op.explicit_scaling)
+ quantised_scales, biases = _prepare_scale_and_bias(arch, scale_tens, op.explicit_scaling)
scale_tens.element_size_bytes = 10
# Slice the weight stream up depth-ways into bricks and compress