aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFredrik Svedberg <fredrik.svedberg@arm.com>2020-08-25 11:31:47 +0200
committerFredrik Svedberg <fredrik.svedberg@arm.com>2020-08-27 16:39:29 +0200
commit880e73543120648f08886365a45e8b2ce32d5ff1 (patch)
tree420627fb8e7e5000f23f82c52ba6569f4a696813
parenta41cd4de2af1e43b76a2a33d78eeb2d90a88b757 (diff)
downloadethos-u-vela-880e73543120648f08886365a45e8b2ce32d5ff1.tar.gz
[MLBEDSW-2846] Do not use NHCWB16 for reduce_sum int32
Added checks for not using NHCWB16 for reduce_sum int32 which makes int8/uint8 softmax work. Also enabled softmax graph rewrite by default and fixed a saturation problem. Change-Id: Ic01bd9ece7e5c3edb2900b7915cc747efe9e5760 Signed-off-by: Fredrik Svedberg <fredrik.svedberg@arm.com>
-rw-r--r--ethosu/vela/architecture_features.py3
-rw-r--r--ethosu/vela/scheduler.py3
-rw-r--r--ethosu/vela/softmax.py24
-rw-r--r--ethosu/vela/supported_operators.py6
-rw-r--r--ethosu/vela/test/testutil.py1
-rw-r--r--ethosu/vela/vela.py9
6 files changed, 19 insertions, 27 deletions
diff --git a/ethosu/vela/architecture_features.py b/ethosu/vela/architecture_features.py
index 8b968a3e..fd0e5c06 100644
--- a/ethosu/vela/architecture_features.py
+++ b/ethosu/vela/architecture_features.py
@@ -183,7 +183,6 @@ Note the difference between ArchitectureFeatures and CompilerOptions
block_config_limit,
global_memory_clock_scale,
max_blockdep,
- softmax_support,
weight_estimation_scaling,
):
accelerator_config = accelerator_config.lower()
@@ -332,7 +331,7 @@ Note the difference between ArchitectureFeatures and CompilerOptions
self.generate_block_config_map(Block(ifm_block_max.width, ifm_block_max.height, 128))
# Setup supported operators and restriction checkers class
- self.supported_operators = SupportedOperators(softmax_support)
+ self.supported_operators = SupportedOperators()
# Returns available number of SHRAM banks depending on activation lookup table
# being used or not
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py
index 9b492f01..41902d67 100644
--- a/ethosu/vela/scheduler.py
+++ b/ethosu/vela/scheduler.py
@@ -24,6 +24,7 @@ import numpy as np
from . import live_range
from . import npu_performance
from . import stats_writer
+from .data_type import DataType
from .high_level_command_stream_generator import calc_allowed_ofm_ifm_overlap_for_pass_list
from .nn_graph import CascadedPass
from .nn_graph import PassPlacement
@@ -963,7 +964,7 @@ class DynamicProgrammingScheduler:
use_NHCWB16 = True
rewrites = []
for op in output.consumer_list:
- if op is None:
+ if op is None or (op.type == "ReduceSum" and output.dtype == DataType.int32):
use_NHCWB16 = False
elif op.type == "Reshape":
# Detect no-op reshapes by comparing their full input and output tensor shapes.
diff --git a/ethosu/vela/softmax.py b/ethosu/vela/softmax.py
index eb97c792..7c23f472 100644
--- a/ethosu/vela/softmax.py
+++ b/ethosu/vela/softmax.py
@@ -391,7 +391,9 @@ class SoftMax:
F2_one = create_const_tensor(
"F2_one_const", [1, 1, 1, 1], DataType.int32, [(1 << 29)], np.int32, quantization=no_scale_quant
)
- two = create_const_tensor("two_const", [1, 1, 1, 1], DataType.int32, [2], np.int32, quantization=no_scale_quant)
+ four = create_const_tensor(
+ "four_const", [1, 1, 1, 1], DataType.int32, [4], np.int32, quantization=no_scale_quant
+ )
for i in range(3):
# PASS 13, 18, 23 - MUL
mul_op = Operation("MulAct", self.op.name + "_mul%d" % (13 + i * 5))
@@ -416,10 +418,10 @@ class SoftMax:
to_rescale.quantization = one_scale_quant.clone()
to_rescale.quantization.scale_f32 = 2.0
mul_op.set_output_tensor(to_rescale)
- # PASS 16, 21, 26 - SHL
- shl_op = Operation("SHL", self.op.name + "_shl%d" % (16 + i * 5))
+ # PASS 16, 21, 26 - MUL
+ shl_op = Operation("MulAct", self.op.name + "_mul%d" % (16 + i * 5))
shl_op.add_input_tensor(to_rescale)
- shl_op.add_input_tensor(two)
+ shl_op.add_input_tensor(four)
to_add = Tensor(reduce_sum_shape, DataType.int32, shl_op.name + "_0")
to_add.quantization = no_scale_quant
shl_op.set_output_tensor(to_add)
@@ -431,13 +433,15 @@ class SoftMax:
nr_x.quantization = one_scale_quant
add_op.set_output_tensor(nr_x)
- # PASS 28 - SHL
- shl28_op = Operation("SHL", self.op.name + "_shl28")
- shl28_op.add_input_tensor(nr_x)
- shl28_op.add_input_tensor(one)
- scale_factor = Tensor(reduce_sum_shape, DataType.int32, shl28_op.name + "_0")
+ # PASS 28 - Multiply
+ mul28_op = Operation("MulAct", self.op.name + "_mul28")
+ mul28_op.add_input_tensor(nr_x)
+ mul28_op.add_input_tensor(
+ create_const_tensor("two_const", [1, 1, 1, 1], DataType.int32, [2], np.int32, quantization=no_scale_quant)
+ )
+ scale_factor = Tensor(reduce_sum_shape, DataType.int32, mul28_op.name + "_0")
scale_factor.quantization = one_scale_quant
- shl28_op.set_output_tensor(scale_factor)
+ mul28_op.set_output_tensor(scale_factor)
# PASS 29 - Multiply
mul_op = Operation("MulAct", self.op.name + "_mul29")
diff --git a/ethosu/vela/supported_operators.py b/ethosu/vela/supported_operators.py
index 567c05ca..f57cbee2 100644
--- a/ethosu/vela/supported_operators.py
+++ b/ethosu/vela/supported_operators.py
@@ -22,8 +22,7 @@ from .data_type import DataType
class SupportedOperators:
- def __init__(self, softmax_support):
- self.softmax_support = softmax_support
+ def __init__(self):
# Categorised lists of supported operators
self.npu_pre_ops = set(("QuantizedResizeBilinear", "SplitSliceRead",))
self.convolution_ops = set(("Conv2DBiasAct", "Conv2D", "QuantizedConv2D",))
@@ -393,9 +392,6 @@ class SupportedOperators:
def check_activation_ops(self, op):
if op.type == "Softmax":
- if not self.softmax_support:
- return False
-
ifm_tensor = op.inputs[0]
ofm_tensor = op.outputs[0]
diff --git a/ethosu/vela/test/testutil.py b/ethosu/vela/test/testutil.py
index 68866fc7..fb6ca591 100644
--- a/ethosu/vela/test/testutil.py
+++ b/ethosu/vela/test/testutil.py
@@ -37,7 +37,6 @@ def create_arch():
block_config_limit=None,
global_memory_clock_scale=1.0,
max_blockdep=0,
- softmax_support=True,
weight_estimation_scaling=1.0,
)
diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py
index 19080926..91899c28 100644
--- a/ethosu/vela/vela.py
+++ b/ethosu/vela/vela.py
@@ -254,18 +254,12 @@ def main(args=None):
help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)",
)
parser.add_argument(
- "--softmax-support",
- type=ast.literal_eval,
- default=False,
- choices=[True, False],
- help="Control if Softmax should be transformed into a set of npu operations (default: %(default)s)",
- )
- parser.add_argument(
"--weight-estimation-scaling",
type=float,
default=1.0,
help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"),
)
+
args = parser.parse_args(args=args)
# Read configuration file
@@ -295,7 +289,6 @@ def main(args=None):
block_config_limit=args.block_config_limit,
global_memory_clock_scale=args.global_memory_clock_scale,
max_blockdep=args.max_block_dependency,
- softmax_support=args.softmax_support,
weight_estimation_scaling=args.weight_estimation_scaling,
)