diff options
author | Rickard Bolin <rickard.bolin@arm.com> | 2022-06-09 13:07:17 +0000 |
---|---|---|
committer | Rickard Bolin <rickard.bolin@arm.com> | 2022-09-12 08:06:00 +0000 |
commit | 9ae34556663d09cc3dff19e53e68b57c8c940565 (patch) | |
tree | 51088224df14f66bd02afec67b933bd2ab192efd /ethosu/vela/high_level_command_to_npu_op.py | |
parent | 059166304f9ef47f0b916c1325700ed826f25581 (diff) | |
download | ethos-u-vela-9ae34556663d09cc3dff19e53e68b57c8c940565.tar.gz |
MLBEDSW-6613: Implement tile padding
Implement new padding mode which pads two edges of the IFM with the
current values of those edges
Signed-off-by: Rickard Bolin <rickard.bolin@arm.com>
Change-Id: I8523e0cabdac80b48710703859003e33050cc150
Diffstat (limited to 'ethosu/vela/high_level_command_to_npu_op.py')
-rw-r--r-- | ethosu/vela/high_level_command_to_npu_op.py | 91 |
1 files changed, 86 insertions, 5 deletions
diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py index 2ce150fc..18919431 100644 --- a/ethosu/vela/high_level_command_to_npu_op.py +++ b/ethosu/vela/high_level_command_to_npu_op.py @@ -37,6 +37,7 @@ from .api import NpuElementWiseOperation from .api import NpuFeatureMap from .api import NpuLayout from .api import NpuOperation +from .api import NpuOperationType from .api import NpuPadding from .api import NpuPoolingOp from .api import NpuPoolingOperation @@ -59,6 +60,7 @@ from .numeric_util import round_up from .operation import NpuBlockType from .operation import Op from .operation import Operation +from .operation import Padding from .register_command_stream_generator import generate_command_stream from .register_command_stream_util import BASE_PTR_INDEX_MEM2MEM from .register_command_stream_util import to_npu_kernel @@ -148,7 +150,7 @@ def get_rounding_mode(op: Operation, fused_quantize: bool) -> NpuRoundingMode: return rounding_mode -def create_padding(cmd: NpuStripe, primary_op: Operation) -> NpuPadding: +def create_padding(cmd: NpuStripe, primary_op: Operation, npu_op: NpuBlockOperation) -> NpuPadding: if primary_op.type.npu_block_type == NpuBlockType.VectorProduct: return NpuPadding(top=0, left=0, bottom=0, right=0) top, left, bottom, right = primary_op.attrs["explicit_padding"] @@ -174,9 +176,91 @@ def create_padding(cmd: NpuStripe, primary_op: Operation) -> NpuPadding: left = 0 if len(cmd.ifm_box.end_coord) >= 2 and cmd.ifm_box.end_coord[-2] < box_end_coord_max: right = 0 + + # If tile padding is selected, modify the tile base addresses and set NpuPadding to zero. + if primary_op.attrs.get("padding", None) == Padding.TILE: + assert cmd.ifm_tensor.format == TensorFormat.NHCWB16, "Tensor format NHCWB16 required to perform tile padding" + assert npu_op.op_type == NpuOperationType.ConvDepthWise, "Tile padding only supported for depthwise convolution" + assert npu_op.ifm is not None, "Feature map must be initialized to modify the tile addresses" + npu_op.ifm.tiles = modify_tile_addresses_for_padding( + npu_op.ifm.tiles, + primary_op.attrs.get("explicit_padding", None), + channels=cmd.ps.ifm_shapes[0].depth, + dtype=cmd.ifm_tensor.dtype, + ) + top, left, bottom, right = 0, 0, 0, 0 return NpuPadding(top=top, left=left, bottom=bottom, right=right) +def modify_tile_addresses_for_padding( + tile_box: NpuTileBox, padding_direction: List[int], channels: int, dtype: DataType +) -> NpuTileBox: + # Addresses are 16-bytes aligned when using the NHCWB16 format, which is required to utilize tiling + # Calculate the offset to top right, bottom left and bottom right element in the IFM (top left offset is 0) + """ + Example: 4x4x1 IFM + | a b c d | <-- Offset to TR ('d') is (w0-1) = 3 + | e f g h | + | i j k l | + | m n o p | <-- Offset to BL ('m') is (w0*(h0-1)) = 12 and to BR ('p') ((w0*h0)-1) = 15 + """ + h0, h1, w0, addresses = tile_box + elem_size = 2 if dtype == DataType.int16 else 1 + tr_offset = (w0 - 1) * 16 * elem_size + bl_offset = w0 * (h0 - 1) * 16 * (round_up(channels, 16) // 16) * elem_size + br_offset = tr_offset + bl_offset + + # Explicit padding order: (Top, Left, Bottom, Right) + if padding_direction == (1, 1, 0, 0): + # Pad top left corner + """ + | a a b | + | a b | -> | a a b | + | c d | | c c d | + """ + addresses = [addresses[0]] * 4 + h0, h1, w0 = 1, 1, 1 + + elif padding_direction == (1, 0, 0, 1): + # Pad top right corner + """ + | a b b | + | a b | -> | a b b | + | c d | | c d d | + """ + addresses = [addresses[0], addresses[0] + tr_offset, addresses[0], addresses[0] + tr_offset] + h0, h1, w0 = 1, 1, w0 + + elif padding_direction == (0, 1, 1, 0): + # Pad bottom left corner + """ + | a b | | a a b | + | c d | -> | c c d | + | c c d | + """ + addresses = [addresses[0], addresses[0], addresses[0] + bl_offset, addresses[0] + bl_offset] + h0, h1, w0 = h0, h1, 1 + + elif padding_direction == (0, 0, 1, 1): + # Pad bottom right corner + """ + | a b | | a b b | + | c d | -> | c d d | + | c d d | + """ + addresses = [ + addresses[0], + addresses[0] + tr_offset, + addresses[0] + bl_offset, + addresses[0] + br_offset, + ] + # h0, h1, w0 = h0, h1, w0 + else: + assert 0, "Invalid padding direction for tile padding" + + return NpuTileBox(height_0=h0, height_1=h1, width_0=w0, addresses=[int(addr) for addr in addresses]) + + def get_region(mem_type: MemType, arch: ArchitectureFeatures) -> int: base_ptr_idx_map = { MemType.Permanent_NPU: BasePointerIndex.WeightTensor, @@ -277,9 +361,6 @@ def create_feature_map(tens: Tensor, box: Box, arch: ArchitectureFeatures, op_sh height_0, height_1, width_0, addresses = tens.addresses_for_rolling_buffer( box.start_coord, box.end_coord, op_shape4D ) - for idx, addr in enumerate(addresses): - if addr is None: - addresses[idx] = 0 fm.tiles = NpuTileBox( height_0=height_0, height_1=height_1, width_0=width_0, addresses=[int(addr) for addr in addresses] ) @@ -393,7 +474,7 @@ def set_common_op_fields(npu_op: NpuBlockOperation, cmd: NpuStripe, arch: Archit npu_op.block_config = NpuShape3D(height=ps.block_config[0], width=ps.block_config[1], depth=ps.block_config[3]) if not op.type.is_elementwise_op(): - npu_op.padding = create_padding(cmd, op) + npu_op.padding = create_padding(cmd, op, npu_op) npu_op.kernel = to_npu_kernel(op.kernel) npu_op.ifm_upscale = resampling_mode_inv_map[op.ifm_resampling_mode] return npu_op |