diff options
author | Rickard Bolin <rickard.bolin@arm.com> | 2022-06-09 13:07:17 +0000 |
---|---|---|
committer | Rickard Bolin <rickard.bolin@arm.com> | 2022-09-12 08:06:00 +0000 |
commit | 9ae34556663d09cc3dff19e53e68b57c8c940565 (patch) | |
tree | 51088224df14f66bd02afec67b933bd2ab192efd | |
parent | 059166304f9ef47f0b916c1325700ed826f25581 (diff) | |
download | ethos-u-vela-9ae34556663d09cc3dff19e53e68b57c8c940565.tar.gz |
MLBEDSW-6613: Implement tile padding
Implement new padding mode which pads two edges of the IFM with the
current values of those edges
Signed-off-by: Rickard Bolin <rickard.bolin@arm.com>
Change-Id: I8523e0cabdac80b48710703859003e33050cc150
-rw-r--r-- | ethosu/vela/cascade_builder.py | 2 | ||||
-rw-r--r-- | ethosu/vela/high_level_command_to_npu_op.py | 91 | ||||
-rw-r--r-- | ethosu/vela/operation.py | 1 | ||||
-rw-r--r-- | ethosu/vela/tensor.py | 4 | ||||
-rw-r--r-- | ethosu/vela/tflite_graph_optimiser.py | 3 |
5 files changed, 94 insertions, 7 deletions
diff --git a/ethosu/vela/cascade_builder.py b/ethosu/vela/cascade_builder.py index 94c856f4..3c105374 100644 --- a/ethosu/vela/cascade_builder.py +++ b/ethosu/vela/cascade_builder.py @@ -21,6 +21,7 @@ from collections import namedtuple from .numeric_util import round_up from .operation import NpuBlockType from .operation import Op +from .operation import Padding from .shape4d import Shape4D non_cascadable_blocks = ( @@ -99,6 +100,7 @@ class CascadeBuilder: and sched_op.parent_op.read_offsets[1] is None and self.element_wise_cascading_conformity(sched_op) and not sched_op.parent_op.type.is_resize_op() + and sched_op.parent_op.attrs.get("padding", None) != Padding.TILE ) def _is_mergeable(self, sched_op) -> bool: diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py index 2ce150fc..18919431 100644 --- a/ethosu/vela/high_level_command_to_npu_op.py +++ b/ethosu/vela/high_level_command_to_npu_op.py @@ -37,6 +37,7 @@ from .api import NpuElementWiseOperation from .api import NpuFeatureMap from .api import NpuLayout from .api import NpuOperation +from .api import NpuOperationType from .api import NpuPadding from .api import NpuPoolingOp from .api import NpuPoolingOperation @@ -59,6 +60,7 @@ from .numeric_util import round_up from .operation import NpuBlockType from .operation import Op from .operation import Operation +from .operation import Padding from .register_command_stream_generator import generate_command_stream from .register_command_stream_util import BASE_PTR_INDEX_MEM2MEM from .register_command_stream_util import to_npu_kernel @@ -148,7 +150,7 @@ def get_rounding_mode(op: Operation, fused_quantize: bool) -> NpuRoundingMode: return rounding_mode -def create_padding(cmd: NpuStripe, primary_op: Operation) -> NpuPadding: +def create_padding(cmd: NpuStripe, primary_op: Operation, npu_op: NpuBlockOperation) -> NpuPadding: if primary_op.type.npu_block_type == NpuBlockType.VectorProduct: return NpuPadding(top=0, left=0, bottom=0, right=0) top, left, bottom, right = primary_op.attrs["explicit_padding"] @@ -174,9 +176,91 @@ def create_padding(cmd: NpuStripe, primary_op: Operation) -> NpuPadding: left = 0 if len(cmd.ifm_box.end_coord) >= 2 and cmd.ifm_box.end_coord[-2] < box_end_coord_max: right = 0 + + # If tile padding is selected, modify the tile base addresses and set NpuPadding to zero. + if primary_op.attrs.get("padding", None) == Padding.TILE: + assert cmd.ifm_tensor.format == TensorFormat.NHCWB16, "Tensor format NHCWB16 required to perform tile padding" + assert npu_op.op_type == NpuOperationType.ConvDepthWise, "Tile padding only supported for depthwise convolution" + assert npu_op.ifm is not None, "Feature map must be initialized to modify the tile addresses" + npu_op.ifm.tiles = modify_tile_addresses_for_padding( + npu_op.ifm.tiles, + primary_op.attrs.get("explicit_padding", None), + channels=cmd.ps.ifm_shapes[0].depth, + dtype=cmd.ifm_tensor.dtype, + ) + top, left, bottom, right = 0, 0, 0, 0 return NpuPadding(top=top, left=left, bottom=bottom, right=right) +def modify_tile_addresses_for_padding( + tile_box: NpuTileBox, padding_direction: List[int], channels: int, dtype: DataType +) -> NpuTileBox: + # Addresses are 16-bytes aligned when using the NHCWB16 format, which is required to utilize tiling + # Calculate the offset to top right, bottom left and bottom right element in the IFM (top left offset is 0) + """ + Example: 4x4x1 IFM + | a b c d | <-- Offset to TR ('d') is (w0-1) = 3 + | e f g h | + | i j k l | + | m n o p | <-- Offset to BL ('m') is (w0*(h0-1)) = 12 and to BR ('p') ((w0*h0)-1) = 15 + """ + h0, h1, w0, addresses = tile_box + elem_size = 2 if dtype == DataType.int16 else 1 + tr_offset = (w0 - 1) * 16 * elem_size + bl_offset = w0 * (h0 - 1) * 16 * (round_up(channels, 16) // 16) * elem_size + br_offset = tr_offset + bl_offset + + # Explicit padding order: (Top, Left, Bottom, Right) + if padding_direction == (1, 1, 0, 0): + # Pad top left corner + """ + | a a b | + | a b | -> | a a b | + | c d | | c c d | + """ + addresses = [addresses[0]] * 4 + h0, h1, w0 = 1, 1, 1 + + elif padding_direction == (1, 0, 0, 1): + # Pad top right corner + """ + | a b b | + | a b | -> | a b b | + | c d | | c d d | + """ + addresses = [addresses[0], addresses[0] + tr_offset, addresses[0], addresses[0] + tr_offset] + h0, h1, w0 = 1, 1, w0 + + elif padding_direction == (0, 1, 1, 0): + # Pad bottom left corner + """ + | a b | | a a b | + | c d | -> | c c d | + | c c d | + """ + addresses = [addresses[0], addresses[0], addresses[0] + bl_offset, addresses[0] + bl_offset] + h0, h1, w0 = h0, h1, 1 + + elif padding_direction == (0, 0, 1, 1): + # Pad bottom right corner + """ + | a b | | a b b | + | c d | -> | c d d | + | c d d | + """ + addresses = [ + addresses[0], + addresses[0] + tr_offset, + addresses[0] + bl_offset, + addresses[0] + br_offset, + ] + # h0, h1, w0 = h0, h1, w0 + else: + assert 0, "Invalid padding direction for tile padding" + + return NpuTileBox(height_0=h0, height_1=h1, width_0=w0, addresses=[int(addr) for addr in addresses]) + + def get_region(mem_type: MemType, arch: ArchitectureFeatures) -> int: base_ptr_idx_map = { MemType.Permanent_NPU: BasePointerIndex.WeightTensor, @@ -277,9 +361,6 @@ def create_feature_map(tens: Tensor, box: Box, arch: ArchitectureFeatures, op_sh height_0, height_1, width_0, addresses = tens.addresses_for_rolling_buffer( box.start_coord, box.end_coord, op_shape4D ) - for idx, addr in enumerate(addresses): - if addr is None: - addresses[idx] = 0 fm.tiles = NpuTileBox( height_0=height_0, height_1=height_1, width_0=width_0, addresses=[int(addr) for addr in addresses] ) @@ -393,7 +474,7 @@ def set_common_op_fields(npu_op: NpuBlockOperation, cmd: NpuStripe, arch: Archit npu_op.block_config = NpuShape3D(height=ps.block_config[0], width=ps.block_config[1], depth=ps.block_config[3]) if not op.type.is_elementwise_op(): - npu_op.padding = create_padding(cmd, op) + npu_op.padding = create_padding(cmd, op, npu_op) npu_op.kernel = to_npu_kernel(op.kernel) npu_op.ifm_upscale = resampling_mode_inv_map[op.ifm_resampling_mode] return npu_op diff --git a/ethosu/vela/operation.py b/ethosu/vela/operation.py index 54e823a8..de68b1d7 100644 --- a/ethosu/vela/operation.py +++ b/ethosu/vela/operation.py @@ -394,6 +394,7 @@ class Padding(Enum): SAME = 0 VALID = 1 EXPLICIT = 2 # Padding is specified in a PAD operation (only used for NPU operations) + TILE = 3 # Uses hardware tiles to pad by 1 with edge values on two sides of the IFM specified in explicit_padding class ActivationFunction: diff --git a/ethosu/vela/tensor.py b/ethosu/vela/tensor.py index e9815845..65473b8d 100644 --- a/ethosu/vela/tensor.py +++ b/ethosu/vela/tensor.py @@ -600,7 +600,7 @@ class Tensor: 1, 1, 1, - [self.address_for_coordinate(start_coord, op_shape4D=op_shape4D), None, None, None], + [self.address_for_coordinate(start_coord, op_shape4D=op_shape4D), 0, 0, 0], ) if self.is_standard_fm: @@ -617,7 +617,7 @@ class Tensor: box_height0 = crossing_y - start_coord[1] box_width = crossing_x - start_coord[2] - addresses: List = [None] * 4 + addresses: List = [0] * 4 addresses[0] = self.address_for_coordinate(start_coord, op_shape4D=op_shape4D) if end_coord[2] > crossing_x: diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py index 0f199de0..611046ba 100644 --- a/ethosu/vela/tflite_graph_optimiser.py +++ b/ethosu/vela/tflite_graph_optimiser.py @@ -238,6 +238,9 @@ def calc_padding_and_skirt(padding_type, kernel, input_shape, explicit_padding): top, left, bottom, right = explicit_padding top_pad, bottom_pad = calc_explicit_padding(int(input_shape.height), int(s_y), int(k_h), int(top), int(bottom)) left_pad, right_pad = calc_explicit_padding(int(input_shape.width), int(s_x), int(k_w), int(left), int(right)) + elif padding_type == Padding.TILE: + # The values in the explicit padding only represent the "direction" in which to pad + top_pad, left_pad, bottom_pad, right_pad = explicit_padding else: raise UnsupportedFeatureError(f"Unsupported padding = {padding_type} for padding calculation") padding = (top_pad, left_pad, bottom_pad, right_pad) |