aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFredrik Svedberg <fredrik.svedberg@arm.com>2022-10-11 21:50:51 +0200
committerFredrik Svedberg <fredrik.svedberg@arm.com>2022-10-12 11:22:17 +0000
commitb81e1bb92be8ea5b29625cf2f361e9160286b16a (patch)
tree0963851b6d449224f67cce10ba510830db7b4172
parent3e3faa90def7c2c4aaaf71543726c881f05f53b5 (diff)
downloadethos-u-vela-b81e1bb92be8ea5b29625cf2f361e9160286b16a.tar.gz
MLBEDSW-6971 Fix output diff when cascading elementwise operators
Fixed output diff when cascading elementwise operators with reversed operand order. Signed-off-by: Fredrik Svedberg <fredrik.svedberg@arm.com> Change-Id: Iac2e28cfb53037b929459af213f4fa7715b3e6de
-rw-r--r--ethosu/vela/cascade_builder.py5
-rw-r--r--ethosu/vela/high_level_command_stream.py2
-rw-r--r--ethosu/vela/high_level_command_stream_generator.py3
-rw-r--r--ethosu/vela/high_level_command_to_npu_op.py5
-rw-r--r--ethosu/vela/scheduler.py2
-rw-r--r--ethosu/vela/softmax.py2
6 files changed, 15 insertions, 4 deletions
diff --git a/ethosu/vela/cascade_builder.py b/ethosu/vela/cascade_builder.py
index b4a4f876..ebe2f133 100644
--- a/ethosu/vela/cascade_builder.py
+++ b/ethosu/vela/cascade_builder.py
@@ -175,11 +175,12 @@ class CascadeBuilder:
ifm = sched_op.parent_op.ifm
ifm2 = sched_op.parent_op.ifm2
- # Cascading elementwise operations with reverse operand order is not handled
if sched_op.parent_op.type.is_binary_elementwise_op() and ifm and ifm2:
# We cannot rule out cascadability if at least one IFM is constant
+ ifm_const = ifm.ops != [] and ifm.ops[0].type == Op.Const
ifm2_const = ifm2.ops != [] and ifm2.ops[0].type == Op.Const
- return ifm_ifm2_correct_order(ifm.shape, ifm2.shape) and ifm2_const
+ correct_order = ifm_ifm2_correct_order(ifm.shape, ifm2.shape)
+ return (ifm_const and (ifm.shape == ifm2.shape or not correct_order)) or (ifm2_const and correct_order)
else:
# Either one IFM is not variable or it is not a binary elementwise op - we cannot rule out cascadability
return True
diff --git a/ethosu/vela/high_level_command_stream.py b/ethosu/vela/high_level_command_stream.py
index 4a41edd0..bfe5bce1 100644
--- a/ethosu/vela/high_level_command_stream.py
+++ b/ethosu/vela/high_level_command_stream.py
@@ -210,6 +210,7 @@ class NpuStripe(Command):
ifm2_box=None,
pad_top=0,
pad_bottom=0,
+ reversed_operands=False,
):
self.ps = ps
self.block_config = block_config
@@ -226,6 +227,7 @@ class NpuStripe(Command):
self.weight_box = weight_box
self.pad_top = pad_top
self.pad_bottom = pad_bottom
+ self.reversed_operands = reversed_operands
for i in range(len(self.ofm_box.end_coord)):
assert self.ofm_box.end_coord[i] <= ps.ofm_shapes[0][i]
diff --git a/ethosu/vela/high_level_command_stream_generator.py b/ethosu/vela/high_level_command_stream_generator.py
index 7e13b62f..e71fb6ea 100644
--- a/ethosu/vela/high_level_command_stream_generator.py
+++ b/ethosu/vela/high_level_command_stream_generator.py
@@ -74,6 +74,8 @@ def generate_high_level_commands_for_sched_op(sched_op, schedule):
_,
_,
) = parent_op.get_ifm_ifm2_weights_biases_ofm()
+ if sched_op.reversed_operands:
+ ifm2_tensor, ifm_tensor = ifm_tensor, ifm2_tensor
ifm = sched_op.ifm
ifm2 = sched_op.ifm2
ofm_shape = sched_op.ofm.shape
@@ -236,4 +238,5 @@ def generate_high_level_commands_for_sched_op(sched_op, schedule):
ifm2_box=ifm2_box,
pad_top=pad_top,
pad_bottom=pad_bottom,
+ reversed_operands=sched_op.reversed_operands,
)
diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py
index 974d980c..202917bd 100644
--- a/ethosu/vela/high_level_command_to_npu_op.py
+++ b/ethosu/vela/high_level_command_to_npu_op.py
@@ -555,7 +555,10 @@ def create_npu_elementwise_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> Npu
if elemwise_op not in UNARY_ELEMWISE_OPS:
ifm_shape = [] if cmd.ifm_tensor.shape == [] else ps.ifm_shapes[0].as_list()
ifm2_shape = [] if cmd.ifm2_tensor.shape == [] else ps.ifm_shapes[1].as_list()
- if not ifm_ifm2_correct_order(ifm_shape, ifm2_shape):
+ if cmd.reversed_operands:
+ assert ifm_ifm2_correct_order(ifm_shape, ifm2_shape)
+ npu_op.reversed_operands = True
+ elif not ifm_ifm2_correct_order(ifm_shape, ifm2_shape):
# The scalar/broadcasted feature map has to be the ifm2 tensor so switch the ifms
cmd.ifm_tensor, cmd.ifm2_tensor = cmd.ifm2_tensor, cmd.ifm_tensor
cmd.ifm_box, cmd.ifm2_box = cmd.ifm2_box, cmd.ifm_box
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py
index 9dca63a8..208b121e 100644
--- a/ethosu/vela/scheduler.py
+++ b/ethosu/vela/scheduler.py
@@ -182,6 +182,7 @@ class SchedulerOperation:
self.activation = ps.primary_op.activation
self.kernel = ps.primary_op.kernel
self.resampling_mode = ps.primary_op.ifm_resampling_mode
+ self.reversed_operands = False
self.uses_scalar = ps.primary_op.ifm2 is not None and (
ps.primary_op.ifm.shape == [] or ps.primary_op.ifm2.shape == []
)
@@ -239,6 +240,7 @@ class SchedulerOperation:
# The non-broadcasted IFM should be the primary input
or (ifm1.shape != ofm.shape and ifm2.shape == ofm.shape)
):
+ self.reversed_operands = True
self.ifm, self.ifm2 = self.ifm2, self.ifm
self.parent_ps.ifm_shapes = self.parent_ps.ifm_shapes[::-1]
diff --git a/ethosu/vela/softmax.py b/ethosu/vela/softmax.py
index 1655427e..a0fd19ca 100644
--- a/ethosu/vela/softmax.py
+++ b/ethosu/vela/softmax.py
@@ -353,8 +353,8 @@ class SoftMax:
)
add_op = create_add(
f"{self.op.name}_add{pass_number}",
- f0_one_const,
shifted_sum_minus_one,
+ f0_one_const,
one_scale_quant,
activation,
)