aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRickard Bolin <rickard.bolin@arm.com>2022-06-09 13:07:17 +0000
committerRickard Bolin <rickard.bolin@arm.com>2022-09-12 08:06:00 +0000
commit9ae34556663d09cc3dff19e53e68b57c8c940565 (patch)
tree51088224df14f66bd02afec67b933bd2ab192efd
parent059166304f9ef47f0b916c1325700ed826f25581 (diff)
downloadethos-u-vela-9ae34556663d09cc3dff19e53e68b57c8c940565.tar.gz
MLBEDSW-6613: Implement tile padding
Implement new padding mode which pads two edges of the IFM with the current values of those edges Signed-off-by: Rickard Bolin <rickard.bolin@arm.com> Change-Id: I8523e0cabdac80b48710703859003e33050cc150
-rw-r--r--ethosu/vela/cascade_builder.py2
-rw-r--r--ethosu/vela/high_level_command_to_npu_op.py91
-rw-r--r--ethosu/vela/operation.py1
-rw-r--r--ethosu/vela/tensor.py4
-rw-r--r--ethosu/vela/tflite_graph_optimiser.py3
5 files changed, 94 insertions, 7 deletions
diff --git a/ethosu/vela/cascade_builder.py b/ethosu/vela/cascade_builder.py
index 94c856f4..3c105374 100644
--- a/ethosu/vela/cascade_builder.py
+++ b/ethosu/vela/cascade_builder.py
@@ -21,6 +21,7 @@ from collections import namedtuple
from .numeric_util import round_up
from .operation import NpuBlockType
from .operation import Op
+from .operation import Padding
from .shape4d import Shape4D
non_cascadable_blocks = (
@@ -99,6 +100,7 @@ class CascadeBuilder:
and sched_op.parent_op.read_offsets[1] is None
and self.element_wise_cascading_conformity(sched_op)
and not sched_op.parent_op.type.is_resize_op()
+ and sched_op.parent_op.attrs.get("padding", None) != Padding.TILE
)
def _is_mergeable(self, sched_op) -> bool:
diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py
index 2ce150fc..18919431 100644
--- a/ethosu/vela/high_level_command_to_npu_op.py
+++ b/ethosu/vela/high_level_command_to_npu_op.py
@@ -37,6 +37,7 @@ from .api import NpuElementWiseOperation
from .api import NpuFeatureMap
from .api import NpuLayout
from .api import NpuOperation
+from .api import NpuOperationType
from .api import NpuPadding
from .api import NpuPoolingOp
from .api import NpuPoolingOperation
@@ -59,6 +60,7 @@ from .numeric_util import round_up
from .operation import NpuBlockType
from .operation import Op
from .operation import Operation
+from .operation import Padding
from .register_command_stream_generator import generate_command_stream
from .register_command_stream_util import BASE_PTR_INDEX_MEM2MEM
from .register_command_stream_util import to_npu_kernel
@@ -148,7 +150,7 @@ def get_rounding_mode(op: Operation, fused_quantize: bool) -> NpuRoundingMode:
return rounding_mode
-def create_padding(cmd: NpuStripe, primary_op: Operation) -> NpuPadding:
+def create_padding(cmd: NpuStripe, primary_op: Operation, npu_op: NpuBlockOperation) -> NpuPadding:
if primary_op.type.npu_block_type == NpuBlockType.VectorProduct:
return NpuPadding(top=0, left=0, bottom=0, right=0)
top, left, bottom, right = primary_op.attrs["explicit_padding"]
@@ -174,9 +176,91 @@ def create_padding(cmd: NpuStripe, primary_op: Operation) -> NpuPadding:
left = 0
if len(cmd.ifm_box.end_coord) >= 2 and cmd.ifm_box.end_coord[-2] < box_end_coord_max:
right = 0
+
+ # If tile padding is selected, modify the tile base addresses and set NpuPadding to zero.
+ if primary_op.attrs.get("padding", None) == Padding.TILE:
+ assert cmd.ifm_tensor.format == TensorFormat.NHCWB16, "Tensor format NHCWB16 required to perform tile padding"
+ assert npu_op.op_type == NpuOperationType.ConvDepthWise, "Tile padding only supported for depthwise convolution"
+ assert npu_op.ifm is not None, "Feature map must be initialized to modify the tile addresses"
+ npu_op.ifm.tiles = modify_tile_addresses_for_padding(
+ npu_op.ifm.tiles,
+ primary_op.attrs.get("explicit_padding", None),
+ channels=cmd.ps.ifm_shapes[0].depth,
+ dtype=cmd.ifm_tensor.dtype,
+ )
+ top, left, bottom, right = 0, 0, 0, 0
return NpuPadding(top=top, left=left, bottom=bottom, right=right)
+def modify_tile_addresses_for_padding(
+ tile_box: NpuTileBox, padding_direction: List[int], channels: int, dtype: DataType
+) -> NpuTileBox:
+ # Addresses are 16-bytes aligned when using the NHCWB16 format, which is required to utilize tiling
+ # Calculate the offset to top right, bottom left and bottom right element in the IFM (top left offset is 0)
+ """
+ Example: 4x4x1 IFM
+ | a b c d | <-- Offset to TR ('d') is (w0-1) = 3
+ | e f g h |
+ | i j k l |
+ | m n o p | <-- Offset to BL ('m') is (w0*(h0-1)) = 12 and to BR ('p') ((w0*h0)-1) = 15
+ """
+ h0, h1, w0, addresses = tile_box
+ elem_size = 2 if dtype == DataType.int16 else 1
+ tr_offset = (w0 - 1) * 16 * elem_size
+ bl_offset = w0 * (h0 - 1) * 16 * (round_up(channels, 16) // 16) * elem_size
+ br_offset = tr_offset + bl_offset
+
+ # Explicit padding order: (Top, Left, Bottom, Right)
+ if padding_direction == (1, 1, 0, 0):
+ # Pad top left corner
+ """
+ | a a b |
+ | a b | -> | a a b |
+ | c d | | c c d |
+ """
+ addresses = [addresses[0]] * 4
+ h0, h1, w0 = 1, 1, 1
+
+ elif padding_direction == (1, 0, 0, 1):
+ # Pad top right corner
+ """
+ | a b b |
+ | a b | -> | a b b |
+ | c d | | c d d |
+ """
+ addresses = [addresses[0], addresses[0] + tr_offset, addresses[0], addresses[0] + tr_offset]
+ h0, h1, w0 = 1, 1, w0
+
+ elif padding_direction == (0, 1, 1, 0):
+ # Pad bottom left corner
+ """
+ | a b | | a a b |
+ | c d | -> | c c d |
+ | c c d |
+ """
+ addresses = [addresses[0], addresses[0], addresses[0] + bl_offset, addresses[0] + bl_offset]
+ h0, h1, w0 = h0, h1, 1
+
+ elif padding_direction == (0, 0, 1, 1):
+ # Pad bottom right corner
+ """
+ | a b | | a b b |
+ | c d | -> | c d d |
+ | c d d |
+ """
+ addresses = [
+ addresses[0],
+ addresses[0] + tr_offset,
+ addresses[0] + bl_offset,
+ addresses[0] + br_offset,
+ ]
+ # h0, h1, w0 = h0, h1, w0
+ else:
+ assert 0, "Invalid padding direction for tile padding"
+
+ return NpuTileBox(height_0=h0, height_1=h1, width_0=w0, addresses=[int(addr) for addr in addresses])
+
+
def get_region(mem_type: MemType, arch: ArchitectureFeatures) -> int:
base_ptr_idx_map = {
MemType.Permanent_NPU: BasePointerIndex.WeightTensor,
@@ -277,9 +361,6 @@ def create_feature_map(tens: Tensor, box: Box, arch: ArchitectureFeatures, op_sh
height_0, height_1, width_0, addresses = tens.addresses_for_rolling_buffer(
box.start_coord, box.end_coord, op_shape4D
)
- for idx, addr in enumerate(addresses):
- if addr is None:
- addresses[idx] = 0
fm.tiles = NpuTileBox(
height_0=height_0, height_1=height_1, width_0=width_0, addresses=[int(addr) for addr in addresses]
)
@@ -393,7 +474,7 @@ def set_common_op_fields(npu_op: NpuBlockOperation, cmd: NpuStripe, arch: Archit
npu_op.block_config = NpuShape3D(height=ps.block_config[0], width=ps.block_config[1], depth=ps.block_config[3])
if not op.type.is_elementwise_op():
- npu_op.padding = create_padding(cmd, op)
+ npu_op.padding = create_padding(cmd, op, npu_op)
npu_op.kernel = to_npu_kernel(op.kernel)
npu_op.ifm_upscale = resampling_mode_inv_map[op.ifm_resampling_mode]
return npu_op
diff --git a/ethosu/vela/operation.py b/ethosu/vela/operation.py
index 54e823a8..de68b1d7 100644
--- a/ethosu/vela/operation.py
+++ b/ethosu/vela/operation.py
@@ -394,6 +394,7 @@ class Padding(Enum):
SAME = 0
VALID = 1
EXPLICIT = 2 # Padding is specified in a PAD operation (only used for NPU operations)
+ TILE = 3 # Uses hardware tiles to pad by 1 with edge values on two sides of the IFM specified in explicit_padding
class ActivationFunction:
diff --git a/ethosu/vela/tensor.py b/ethosu/vela/tensor.py
index e9815845..65473b8d 100644
--- a/ethosu/vela/tensor.py
+++ b/ethosu/vela/tensor.py
@@ -600,7 +600,7 @@ class Tensor:
1,
1,
1,
- [self.address_for_coordinate(start_coord, op_shape4D=op_shape4D), None, None, None],
+ [self.address_for_coordinate(start_coord, op_shape4D=op_shape4D), 0, 0, 0],
)
if self.is_standard_fm:
@@ -617,7 +617,7 @@ class Tensor:
box_height0 = crossing_y - start_coord[1]
box_width = crossing_x - start_coord[2]
- addresses: List = [None] * 4
+ addresses: List = [0] * 4
addresses[0] = self.address_for_coordinate(start_coord, op_shape4D=op_shape4D)
if end_coord[2] > crossing_x:
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index 0f199de0..611046ba 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -238,6 +238,9 @@ def calc_padding_and_skirt(padding_type, kernel, input_shape, explicit_padding):
top, left, bottom, right = explicit_padding
top_pad, bottom_pad = calc_explicit_padding(int(input_shape.height), int(s_y), int(k_h), int(top), int(bottom))
left_pad, right_pad = calc_explicit_padding(int(input_shape.width), int(s_x), int(k_w), int(left), int(right))
+ elif padding_type == Padding.TILE:
+ # The values in the explicit padding only represent the "direction" in which to pad
+ top_pad, left_pad, bottom_pad, right_pad = explicit_padding
else:
raise UnsupportedFeatureError(f"Unsupported padding = {padding_type} for padding calculation")
padding = (top_pad, left_pad, bottom_pad, right_pad)