aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/weight_compressor.py
diff options
context:
space:
mode:
authorPatrik Gustavsson <patrik.gustavsson@arm.com>2021-06-28 07:41:58 +0200
committerPatrik Gustavsson <patrik.gustavsson@arm.com>2021-07-08 10:57:25 +0200
commit8f1f9aaa58175b17cd2e505bfcdb0e40c955ea72 (patch)
tree0174f8ef15007f5e220cfc4d283046451282102e /ethosu/vela/weight_compressor.py
parent6f4955aa7097b123bbf31aae4654547bb3e3c68c (diff)
downloadethos-u-vela-8f1f9aaa58175b17cd2e505bfcdb0e40c955ea72.tar.gz
MLBEDSW-4838 Added basic TOSA support.
Added basic TOSA support, enabling Vela to read and compile a .tosa file corresponding to CONV2D + Rescale + Clamp, and writing it to an optimized .tflite file. The optimized .tflite file, will in this case, hold a commandstream where the Rescale and Clamp has been fused into the CONV2D. The optimized tflite file is not output from Vela. -Added support to read .tosa file into Vela internal structure. - Added tosa_reader.py, tosa_mapper.py and helper files stored under tosa/ - Support for this limited to ~10 ops -Added reader_util.py for functions common for TOSA and TFLite -Added tosa_graph_optimiser.py -Added support to fuse Rescale into convolution -Modified handling for padding -Added support to fuse Clamp to previous op -Added graph_optimiser_util.py -Moved functions common for TOSA/TFLite graph optimization to this file. -Renamed graph_optimiser.py to tflite_graph_optmiser.py -Added separate tosa_supported_operators.py -Added supported_operator_util.py -For functions in common for TOSA/TFLite Signed-off-by: Patrik Gustavsson <patrik.gustavsson@arm.com> Change-Id: Ic3c540504ec8c5eb4771397fdc6882050ecf33ab
Diffstat (limited to 'ethosu/vela/weight_compressor.py')
-rw-r--r--ethosu/vela/weight_compressor.py16
1 files changed, 10 insertions, 6 deletions
diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py
index 4ba3dee3..7e33e93b 100644
--- a/ethosu/vela/weight_compressor.py
+++ b/ethosu/vela/weight_compressor.py
@@ -203,7 +203,7 @@ def core_deinterleave(hwio, core, ncores):
return ohwi[core : ohwi.shape[0] : ncores]
-def _prepare_scale_and_bias(arch, tens, rescale_for_faf):
+def _prepare_scale_and_bias(arch, tens, rescale_for_faf, explicit_scaling):
assert tens.purpose in [TensorPurpose.FeatureMap, TensorPurpose.FSBias]
assert tens.format == TensorFormat.NHWC
# the connected operator should expect a bias input unless it is a FullyConnected
@@ -260,11 +260,15 @@ def _prepare_scale_and_bias(arch, tens, rescale_for_faf):
else:
raise UnsupportedFeatureError(f"Compression of {ifm_dtype} is not implemented; Tensor: '{tens.name}'")
- # quantise all of the weight scales into (scale_factor, shift)
- if ifm_dtype == DataType.int16:
- quantised_scales = [reduced_quantise_scale(scale) for scale in scales]
+ if explicit_scaling:
+ assert len(explicit_scaling.shift) == len(explicit_scaling.multiplier)
+ quantised_scales = [(int(m), int(s)) for s, m in zip(explicit_scaling.shift, explicit_scaling.multiplier)]
else:
- quantised_scales = [quantise_scale(scale) for scale in scales]
+ # quantise all of the weight scales into (scale_factor, shift)
+ if ifm_dtype == DataType.int16:
+ quantised_scales = [reduced_quantise_scale(scale) for scale in scales]
+ else:
+ quantised_scales = [quantise_scale(scale) for scale in scales]
# If only 1 quantised scale is used, repeat that value for the length of the biases
if len(quantised_scales) == 1:
@@ -355,7 +359,7 @@ def encode_weight_and_scale_tensor(
# Bias & scale
if do_scales:
- quantised_scales, biases = _prepare_scale_and_bias(arch, scale_tens, rescale_for_faf)
+ quantised_scales, biases = _prepare_scale_and_bias(arch, scale_tens, rescale_for_faf, op.explicit_scaling)
scale_tens.element_size_bytes = 10
# Slice the weight stream up depth-ways into bricks and compress