diff options
author | Patrik Gustavsson <patrik.gustavsson@arm.com> | 2021-06-28 07:41:58 +0200 |
---|---|---|
committer | Patrik Gustavsson <patrik.gustavsson@arm.com> | 2021-07-08 10:57:25 +0200 |
commit | 8f1f9aaa58175b17cd2e505bfcdb0e40c955ea72 (patch) | |
tree | 0174f8ef15007f5e220cfc4d283046451282102e /ethosu/vela/weight_compressor.py | |
parent | 6f4955aa7097b123bbf31aae4654547bb3e3c68c (diff) | |
download | ethos-u-vela-8f1f9aaa58175b17cd2e505bfcdb0e40c955ea72.tar.gz |
MLBEDSW-4838 Added basic TOSA support.
Added basic TOSA support, enabling Vela to
read and compile a .tosa file corresponding to
CONV2D + Rescale + Clamp, and writing it to an
optimized .tflite file.
The optimized .tflite file, will in this case, hold
a commandstream where the Rescale and Clamp has been
fused into the CONV2D.
The optimized tflite file is not output from Vela.
-Added support to read .tosa file into Vela
internal structure.
- Added tosa_reader.py, tosa_mapper.py and
helper files stored under tosa/
- Support for this limited to ~10 ops
-Added reader_util.py for functions common
for TOSA and TFLite
-Added tosa_graph_optimiser.py
-Added support to fuse Rescale into convolution
-Modified handling for padding
-Added support to fuse Clamp to previous op
-Added graph_optimiser_util.py
-Moved functions common for TOSA/TFLite graph
optimization to this file.
-Renamed graph_optimiser.py to tflite_graph_optmiser.py
-Added separate tosa_supported_operators.py
-Added supported_operator_util.py
-For functions in common for TOSA/TFLite
Signed-off-by: Patrik Gustavsson <patrik.gustavsson@arm.com>
Change-Id: Ic3c540504ec8c5eb4771397fdc6882050ecf33ab
Diffstat (limited to 'ethosu/vela/weight_compressor.py')
-rw-r--r-- | ethosu/vela/weight_compressor.py | 16 |
1 files changed, 10 insertions, 6 deletions
diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py index 4ba3dee3..7e33e93b 100644 --- a/ethosu/vela/weight_compressor.py +++ b/ethosu/vela/weight_compressor.py @@ -203,7 +203,7 @@ def core_deinterleave(hwio, core, ncores): return ohwi[core : ohwi.shape[0] : ncores] -def _prepare_scale_and_bias(arch, tens, rescale_for_faf): +def _prepare_scale_and_bias(arch, tens, rescale_for_faf, explicit_scaling): assert tens.purpose in [TensorPurpose.FeatureMap, TensorPurpose.FSBias] assert tens.format == TensorFormat.NHWC # the connected operator should expect a bias input unless it is a FullyConnected @@ -260,11 +260,15 @@ def _prepare_scale_and_bias(arch, tens, rescale_for_faf): else: raise UnsupportedFeatureError(f"Compression of {ifm_dtype} is not implemented; Tensor: '{tens.name}'") - # quantise all of the weight scales into (scale_factor, shift) - if ifm_dtype == DataType.int16: - quantised_scales = [reduced_quantise_scale(scale) for scale in scales] + if explicit_scaling: + assert len(explicit_scaling.shift) == len(explicit_scaling.multiplier) + quantised_scales = [(int(m), int(s)) for s, m in zip(explicit_scaling.shift, explicit_scaling.multiplier)] else: - quantised_scales = [quantise_scale(scale) for scale in scales] + # quantise all of the weight scales into (scale_factor, shift) + if ifm_dtype == DataType.int16: + quantised_scales = [reduced_quantise_scale(scale) for scale in scales] + else: + quantised_scales = [quantise_scale(scale) for scale in scales] # If only 1 quantised scale is used, repeat that value for the length of the biases if len(quantised_scales) == 1: @@ -355,7 +359,7 @@ def encode_weight_and_scale_tensor( # Bias & scale if do_scales: - quantised_scales, biases = _prepare_scale_and_bias(arch, scale_tens, rescale_for_faf) + quantised_scales, biases = _prepare_scale_and_bias(arch, scale_tens, rescale_for_faf, op.explicit_scaling) scale_tens.element_size_bytes = 10 # Slice the weight stream up depth-ways into bricks and compress |