From b81e1bb92be8ea5b29625cf2f361e9160286b16a Mon Sep 17 00:00:00 2001 From: Fredrik Svedberg Date: Tue, 11 Oct 2022 21:50:51 +0200 Subject: MLBEDSW-6971 Fix output diff when cascading elementwise operators Fixed output diff when cascading elementwise operators with reversed operand order. Signed-off-by: Fredrik Svedberg Change-Id: Iac2e28cfb53037b929459af213f4fa7715b3e6de --- ethosu/vela/cascade_builder.py | 5 +++-- ethosu/vela/high_level_command_stream.py | 2 ++ ethosu/vela/high_level_command_stream_generator.py | 3 +++ ethosu/vela/high_level_command_to_npu_op.py | 5 ++++- ethosu/vela/scheduler.py | 2 ++ ethosu/vela/softmax.py | 2 +- 6 files changed, 15 insertions(+), 4 deletions(-) (limited to 'ethosu') diff --git a/ethosu/vela/cascade_builder.py b/ethosu/vela/cascade_builder.py index b4a4f876..ebe2f133 100644 --- a/ethosu/vela/cascade_builder.py +++ b/ethosu/vela/cascade_builder.py @@ -175,11 +175,12 @@ class CascadeBuilder: ifm = sched_op.parent_op.ifm ifm2 = sched_op.parent_op.ifm2 - # Cascading elementwise operations with reverse operand order is not handled if sched_op.parent_op.type.is_binary_elementwise_op() and ifm and ifm2: # We cannot rule out cascadability if at least one IFM is constant + ifm_const = ifm.ops != [] and ifm.ops[0].type == Op.Const ifm2_const = ifm2.ops != [] and ifm2.ops[0].type == Op.Const - return ifm_ifm2_correct_order(ifm.shape, ifm2.shape) and ifm2_const + correct_order = ifm_ifm2_correct_order(ifm.shape, ifm2.shape) + return (ifm_const and (ifm.shape == ifm2.shape or not correct_order)) or (ifm2_const and correct_order) else: # Either one IFM is not variable or it is not a binary elementwise op - we cannot rule out cascadability return True diff --git a/ethosu/vela/high_level_command_stream.py b/ethosu/vela/high_level_command_stream.py index 4a41edd0..bfe5bce1 100644 --- a/ethosu/vela/high_level_command_stream.py +++ b/ethosu/vela/high_level_command_stream.py @@ -210,6 +210,7 @@ class NpuStripe(Command): ifm2_box=None, pad_top=0, pad_bottom=0, + reversed_operands=False, ): self.ps = ps self.block_config = block_config @@ -226,6 +227,7 @@ class NpuStripe(Command): self.weight_box = weight_box self.pad_top = pad_top self.pad_bottom = pad_bottom + self.reversed_operands = reversed_operands for i in range(len(self.ofm_box.end_coord)): assert self.ofm_box.end_coord[i] <= ps.ofm_shapes[0][i] diff --git a/ethosu/vela/high_level_command_stream_generator.py b/ethosu/vela/high_level_command_stream_generator.py index 7e13b62f..e71fb6ea 100644 --- a/ethosu/vela/high_level_command_stream_generator.py +++ b/ethosu/vela/high_level_command_stream_generator.py @@ -74,6 +74,8 @@ def generate_high_level_commands_for_sched_op(sched_op, schedule): _, _, ) = parent_op.get_ifm_ifm2_weights_biases_ofm() + if sched_op.reversed_operands: + ifm2_tensor, ifm_tensor = ifm_tensor, ifm2_tensor ifm = sched_op.ifm ifm2 = sched_op.ifm2 ofm_shape = sched_op.ofm.shape @@ -236,4 +238,5 @@ def generate_high_level_commands_for_sched_op(sched_op, schedule): ifm2_box=ifm2_box, pad_top=pad_top, pad_bottom=pad_bottom, + reversed_operands=sched_op.reversed_operands, ) diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py index 974d980c..202917bd 100644 --- a/ethosu/vela/high_level_command_to_npu_op.py +++ b/ethosu/vela/high_level_command_to_npu_op.py @@ -555,7 +555,10 @@ def create_npu_elementwise_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> Npu if elemwise_op not in UNARY_ELEMWISE_OPS: ifm_shape = [] if cmd.ifm_tensor.shape == [] else ps.ifm_shapes[0].as_list() ifm2_shape = [] if cmd.ifm2_tensor.shape == [] else ps.ifm_shapes[1].as_list() - if not ifm_ifm2_correct_order(ifm_shape, ifm2_shape): + if cmd.reversed_operands: + assert ifm_ifm2_correct_order(ifm_shape, ifm2_shape) + npu_op.reversed_operands = True + elif not ifm_ifm2_correct_order(ifm_shape, ifm2_shape): # The scalar/broadcasted feature map has to be the ifm2 tensor so switch the ifms cmd.ifm_tensor, cmd.ifm2_tensor = cmd.ifm2_tensor, cmd.ifm_tensor cmd.ifm_box, cmd.ifm2_box = cmd.ifm2_box, cmd.ifm_box diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py index 9dca63a8..208b121e 100644 --- a/ethosu/vela/scheduler.py +++ b/ethosu/vela/scheduler.py @@ -182,6 +182,7 @@ class SchedulerOperation: self.activation = ps.primary_op.activation self.kernel = ps.primary_op.kernel self.resampling_mode = ps.primary_op.ifm_resampling_mode + self.reversed_operands = False self.uses_scalar = ps.primary_op.ifm2 is not None and ( ps.primary_op.ifm.shape == [] or ps.primary_op.ifm2.shape == [] ) @@ -239,6 +240,7 @@ class SchedulerOperation: # The non-broadcasted IFM should be the primary input or (ifm1.shape != ofm.shape and ifm2.shape == ofm.shape) ): + self.reversed_operands = True self.ifm, self.ifm2 = self.ifm2, self.ifm self.parent_ps.ifm_shapes = self.parent_ps.ifm_shapes[::-1] diff --git a/ethosu/vela/softmax.py b/ethosu/vela/softmax.py index 1655427e..a0fd19ca 100644 --- a/ethosu/vela/softmax.py +++ b/ethosu/vela/softmax.py @@ -353,8 +353,8 @@ class SoftMax: ) add_op = create_add( f"{self.op.name}_add{pass_number}", - f0_one_const, shifted_sum_minus_one, + f0_one_const, one_scale_quant, activation, ) -- cgit v1.2.1