aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/weight_compressor.py
diff options
context:
space:
mode:
authorFredrik Svedberg <fredrik.svedberg@arm.com>2020-03-30 13:15:28 +0200
committerTim Hall <tim.hall@arm.com>2020-06-18 17:53:52 +0100
commitd67c0aaccd91f4be3ea76f69fa063301ffc73aa3 (patch)
treea9ad54b32d18685347847fcc59db8374bbe36233 /ethosu/vela/weight_compressor.py
parentfed918bfb26dc330a5f066ea5947bc5eb2db4651 (diff)
downloadethos-u-vela-d67c0aaccd91f4be3ea76f69fa063301ffc73aa3.tar.gz
MLBEDSW-819: make int16 changes
Enabled int16 support quantization to match the reference. Change-Id: Ib369640241a9a491f2b0bc52d7f6cb025e30344b Signed-off-by: Fredrik Svedberg <fredrik.svedberg@arm.com>
Diffstat (limited to 'ethosu/vela/weight_compressor.py')
-rw-r--r--ethosu/vela/weight_compressor.py11
1 files changed, 7 insertions, 4 deletions
diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py
index 0b4ac696..92197248 100644
--- a/ethosu/vela/weight_compressor.py
+++ b/ethosu/vela/weight_compressor.py
@@ -25,7 +25,7 @@ import math
import numpy as np
from collections import namedtuple
from .numeric_util import round_up
-from .scaling import quantise_scale
+from .scaling import quantise_scale, reduced_quantise_scale
from .tensor import TensorPurpose, TensorSubPurpose, TensorFormat, TensorBlockTraversal
from .operation import NpuBlockType
from .architecture_features import Block
@@ -287,7 +287,7 @@ def calc_scales_and_pack_biases(tens, arch, oc_quantum, rescale_for_faf=False):
if not rescale_for_faf:
if ifm_dtype == DataType.uint8:
scales = [np.double(ifm_scale * weight_scale) / np.double(ofm_scale) for weight_scale in weight_scales]
- elif ifm_dtype == DataType.int8:
+ elif ifm_dtype == DataType.int8 or ifm_dtype == DataType.int16:
scales = [
(np.double(ifm_scale) * np.double(weight_scale)) / np.double(ofm_scale)
for weight_scale in weight_scales
@@ -297,13 +297,16 @@ def calc_scales_and_pack_biases(tens, arch, oc_quantum, rescale_for_faf=False):
else:
if ifm_dtype == DataType.uint8:
scales = [np.double(ifm_scale * weight_scale * 0x3000) for weight_scale in weight_scales]
- elif ifm_dtype == DataType.int8:
+ elif ifm_dtype == DataType.int8 or ifm_dtype == DataType.int16:
scales = [(np.double(ifm_scale * 0x3000) * np.double(weight_scale)) for weight_scale in weight_scales]
else:
assert False, str(ifm_dtype) + " not implemented"
# quantise all of the weight scales into (scale_factor, shift)
- quantised_scales = [quantise_scale(scale) for scale in scales]
+ if ifm_dtype == DataType.int16:
+ quantised_scales = [reduced_quantise_scale(scale) for scale in scales]
+ else:
+ quantised_scales = [quantise_scale(scale) for scale in scales]
for _, shift in quantised_scales:
assert shift >= 16