From 1c08afa0ed049edd486498e62bab94a4dc7924bc Mon Sep 17 00:00:00 2001 From: Rickard Bolin Date: Fri, 7 Jan 2022 14:22:52 +0000 Subject: MLBEDSW-5534: Enet_640_640_int8 output diff The output diff is caused by not including the kernel dilation when calculating the bottom padding to be used on the last h_stripe. This only shows up when using dedicated_sram since shared_sram does not split into multiple h_stripes and thus uses the padding specified by the skirt instead. Signed-off-by: Rickard Bolin Change-Id: I7f643748b153004d65be2124c0ac6c9d21cd803f --- ethosu/vela/high_level_command_stream.py | 6 ++++-- ethosu/vela/high_level_command_stream_generator.py | 11 ++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/ethosu/vela/high_level_command_stream.py b/ethosu/vela/high_level_command_stream.py index cf31aa5f..7e60221d 100644 --- a/ethosu/vela/high_level_command_stream.py +++ b/ethosu/vela/high_level_command_stream.py @@ -40,9 +40,9 @@ class Box: ifm_shape: Shape4D, npu_block_type: NpuBlockType, concat_offsets: List[int], + k_dilated_height: int, split_offset: Shape4D = None, split_shape: Shape4D = None, - k_height: int = 1, upscaling_factor: int = 1, ): new_start_coord = list(self.start_coord) @@ -105,7 +105,9 @@ class Box: pad_bottom = original_end_coord[-3] - (ifm_shape.height * upscaling_factor) else: k_start = new_start_coord[-3] - pad_top - pad_bottom = max(0, k_start + total_stride + k_height - (ifm_shape.height * upscaling_factor)) + pad_bottom = max( + 0, k_start + total_stride + k_dilated_height - (ifm_shape.height * upscaling_factor) + ) # Adjust for upscaling new_start_coord[-3] = max(new_start_coord[-3] // upscaling_factor, 0) diff --git a/ethosu/vela/high_level_command_stream_generator.py b/ethosu/vela/high_level_command_stream_generator.py index 3d0a1e58..f0d74098 100644 --- a/ethosu/vela/high_level_command_stream_generator.py +++ b/ethosu/vela/high_level_command_stream_generator.py @@ -82,7 +82,7 @@ def generate_high_level_commands_for_sched_op(sched_op, schedule): elif sched_op.op_type == Op.ResizeBilinear: upscaling = round_up_divide(ofm_shape.height, ifm.shape.height) - # Get Kernel height + # Get kernel height and height dilation k_height = 1 if npu_block_type in (NpuBlockType.Pooling, NpuBlockType.ReduceSum): if parent_op is not None: @@ -91,6 +91,11 @@ def generate_high_level_commands_for_sched_op(sched_op, schedule): if uncomp_weight_tensor is not None: k_height = uncomp_weight_tensor.shape[0] + k_height_dilation = parent_op.attrs.get("dilation", (_, 1, _, _))[-3] + + # Calculate dilated kernel height + k_dilated_height = k_height_dilation * (k_height - 1) + 1 + # Define Start and End coordinates for the OFM ofm_start = Shape4D(0, 0, 0, op_info.ofm_depth_slices[0]) ofm_end = ofm_shape @@ -150,9 +155,9 @@ def generate_high_level_commands_for_sched_op(sched_op, schedule): ifm.shape, npu_block_type, write_offset.as_list(), + k_dilated_height, read_offsets[0], read_shapes[0], - k_height, upscaling, ) @@ -164,9 +169,9 @@ def generate_high_level_commands_for_sched_op(sched_op, schedule): ifm2.shape, npu_block_type, write_offset.as_list(), + k_dilated_height, read_offsets[1], read_shapes[1], - k_height, upscaling, ) -- cgit v1.2.1