aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRickard Bolin <rickard.bolin@arm.com>2023-09-05 11:38:19 +0000
committerRickard Bolin <rickard.bolin@arm.com>2024-01-30 14:14:52 +0000
commitfdbb072dacae339dd3f8efd3fb70fa84b9296033 (patch)
tree36d5704fc2e99f429bd3662f15916335e97e27f8
parent014bc28324014dc82e3b352d7a81df9ed0338c56 (diff)
downloadethos-u-vela-fdbb072dacae339dd3f8efd3fb70fa84b9296033.tar.gz
MLBEDSW-8491: Add support for Mirror pad
Change-Id: I3c13118e14195a5fb8e522a38b205b75fb07b74b Signed-off-by: Rickard Bolin <rickard.bolin@arm.com>
-rw-r--r--ethosu/vela/operation.py2
-rw-r--r--ethosu/vela/range_set.py1
-rw-r--r--ethosu/vela/tflite_graph_optimiser.py125
-rw-r--r--ethosu/vela/tflite_mapping.py6
-rw-r--r--ethosu/vela/tflite_model_semantic.py3
-rw-r--r--ethosu/vela/tflite_supported_operators.py31
6 files changed, 160 insertions, 8 deletions
diff --git a/ethosu/vela/operation.py b/ethosu/vela/operation.py
index 9b4149f..a831537 100644
--- a/ethosu/vela/operation.py
+++ b/ethosu/vela/operation.py
@@ -225,7 +225,7 @@ class Op(Enum):
Mean = OperatorInfo(indices=NNG_IFM_INDICES)
Min = OperatorInfo()
Minimum = OperatorInfo(block_type=NpuBlockType.ElementWise, indices=NNG_IFM_IFM2_INDICES)
- MirrorPad = OperatorInfo()
+ MirrorPad = OperatorInfo(indices=NNG_IFM_IFM2_INDICES)
Mul = OperatorInfo(block_type=NpuBlockType.ElementWise, indices=NNG_IFM_IFM2_INDICES)
Neg = OperatorInfo()
NonMaxSuppressionV4 = OperatorInfo()
diff --git a/ethosu/vela/range_set.py b/ethosu/vela/range_set.py
index 1a00373..10a5c83 100644
--- a/ethosu/vela/range_set.py
+++ b/ethosu/vela/range_set.py
@@ -31,7 +31,6 @@ class RangeSet:
self.ranges = ranges # track a list of (start, end) tuples, always in ascending order sorted by start.
if start is not None and start != end:
- assert start < end
self.ranges.append((start, end))
def __or__(self, other):
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index cc947bc..ad979bd 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -1881,6 +1881,130 @@ def replace_pad_by_hw_pad(op: Operation, arch, nng) -> Operation:
return op
+def convert_mirror_pad(op: Operation, arch, nng):
+ if op.type != Op.MirrorPad or not op.run_on_npu:
+ return op
+
+ _, (top, bot), (left, right), _ = op.ifm2.values
+ mode = op.attrs["mode"] # 0 = reflect, 1 = symmetric
+
+ ifm = op.ifm
+ ofm = op.ofm
+ ofm.ops = []
+ elem_size = 2 if ofm.dtype == DataType.int16 else 1
+ n, h, w, c = ifm.shape
+ _, oh, ow, _ = ofm.shape
+ # Force linear format on OFM to allow negative stride multipliers
+ ofm.force_linear_format = True
+
+ # Intermediate ofm needed to store ifm padded with top and bot values as input to the left and right padding
+ intermediate_ofm_tens = Tensor([n, h + top + bot, w, c], ofm.dtype, "intermediate_ofm_tens")
+ intermediate_ofm_tens.quantization = op.outputs[0].quantization.clone()
+ intermediate_ofm_tens.force_linear_format = True
+
+ # If there is no left or right padding, we can write directly to the ofm without an intermediate tensor
+ if not (left or right):
+ intermediate_ofm_tens = ofm
+
+ # Initial op to copy the ifm into the middle of the intermediate ofm
+ avg_pool_init = create_avgpool_nop("init_pool")
+ avg_pool_init.write_shape = Shape4D(n, h, w, c)
+ avg_pool_init.write_offset = Shape4D(0, top, 0, 0)
+ avg_pool_init.read_shapes[0] = Shape4D(n, h, w, c)
+ avg_pool_init.read_offsets[0] = Shape4D(0, 0, 0, 0)
+ avg_pool_init.add_input_tensor(ifm)
+ avg_pool_init.set_output_tensor(intermediate_ofm_tens)
+ avg_pool_init.set_ifm_ofm_shapes()
+ DebugDatabase.add_optimised(op, avg_pool_init)
+
+ # Create pools with negative stride to mirror edges and offset to write at padding positions
+ avg_pool_pad = create_avgpool_nop("pad_pool")
+ for i, pad_amount in enumerate([top, bot, left, right]):
+ # Clear input from previous cloned op
+ avg_pool_pad.inputs = []
+ if not pad_amount:
+ continue
+
+ if i == 0: # top
+ # Set read and write shape width to full ifm width and height to "top" pad size
+ avg_pool_pad.write_shape = Shape4D(n, top, w, c)
+ avg_pool_pad.read_shapes[0] = Shape4D(n, top, w, c)
+ # Leave read offset as default to read the top chunk of the ifm
+ # For reflect mode, shift height offset down one step to "skip" the edge
+ avg_pool_pad.read_offsets[0] = Shape4D(0, 0, 0, 0) if mode == 1 else Shape4D(0, 1, 0, 0)
+ # Offset the base address of tile 0 to start writing just above the ifm that was copied into the middle of
+ # the ofm and use negative height striding to mirror the above ifm chunk
+ avg_pool_pad.tile_base_offsets_ofm[0] = ((top - 1) * w) * c * elem_size
+ if i == 1: # bot
+ # Set read and write shape width to full ifm width and height to "bot" pad size
+ avg_pool_pad.write_shape = Shape4D(n, bot, w, c)
+ avg_pool_pad.read_shapes[0] = Shape4D(n, bot, w, c)
+ # Set read offset to read the bottom chunk of the ifm
+ # For reflect mode, shift height offset up one step to "skip" the edge
+ avg_pool_pad.read_offsets[0] = Shape4D(0, h - bot, 0, 0) if mode == 1 else Shape4D(0, h - bot - 1, 0, 0)
+ # Offset the base address of tile 0 to start writing at the very bottom of the ofm and use negative height
+ # striding to mirror the above ifm chunk
+ avg_pool_pad.tile_base_offsets_ofm[0] = (oh - 1) * w * c * elem_size
+ if i == 2: # left
+ # Set read and write shape height to full intermediate ofm height and width to "left" pad size
+ avg_pool_pad.write_shape = Shape4D(n, h + top + bot, left, c)
+ avg_pool_pad.read_shapes[0] = Shape4D(n, h + top + bot, left, c)
+ # Leave read offset as default to read the leftmost chunk of the intermediate ofm
+ # For reflect mode, shift width offset one step to the right to "skip" the edge
+ avg_pool_pad.read_offsets[0] = Shape4D(0, 0, 0, 0) if mode == 1 else Shape4D(0, 0, 1, 0)
+ # Offset the base address of tile 0 to start writing just left of the intermediate ofm and use negative
+ # width striding to mirror the above ifm chunk
+ avg_pool_pad.tile_base_offsets_ofm[0] = (left - 1) * c * elem_size
+ if i == 3: # right
+ # Set read and write shape height to full intermediate ofm height and width to "right" pad size
+ avg_pool_pad.write_shape = Shape4D(n, h + top + bot, right, c)
+ avg_pool_pad.read_shapes[0] = Shape4D(n, h + top + bot, right, c)
+ # Set read offset to read the rightmost chunk of the intermediate ofm
+ # For reflect mode, shift width offset one step to the left to "skip" the edge
+ avg_pool_pad.read_offsets[0] = Shape4D(0, 0, w - right, 0) if mode == 1 else Shape4D(0, 0, w - right - 1, 0)
+ # Offset the base address of tile 0 to start writing at the rightmost part of the ofm and use negative
+ # width striding to mirror the above ifm chunk
+ avg_pool_pad.tile_base_offsets_ofm[0] = (ow - 1) * c * elem_size
+
+ # Write offset (0,0,0,0) for all convs
+ avg_pool_pad.write_offset = Shape4D(0, 0, 0, 0)
+
+ if i in [0, 1]: # negative height stride for top and bot, negative width stride for left and right
+ avg_pool_pad.ofm_stride_multiplier = [1, -1, 1] # C/H/W
+ # top and bot reads from ifm and writes to intermediate ofm
+ avg_pool_pad.add_input_tensor(ifm)
+ intermediate_ofm_tens.ops.append(avg_pool_pad)
+ avg_pool_pad.outputs = [intermediate_ofm_tens]
+ else:
+ avg_pool_pad.ofm_stride_multiplier = [1, 1, -1] # C/H/W
+ # left and right reads from intermediate ofm and writes to ofm
+ avg_pool_pad.add_input_tensor(intermediate_ofm_tens)
+ ofm.ops.append(avg_pool_pad)
+ avg_pool_pad.outputs = [ofm]
+
+ avg_pool_pad.set_ifm_ofm_shapes()
+ DebugDatabase.add_optimised(op, avg_pool_pad)
+
+ # Clone operation for next padding direction
+ avg_pool_pad = avg_pool_pad.clone(f"_{i}")
+
+ if left or right:
+ # Copy intermediate ofm into final ofm
+ avg_pool_final_copy = create_avgpool_nop("avg_pool_final_copy")
+ avg_pool_final_copy.write_shape = Shape4D(n, h + top + bot, w, c)
+ avg_pool_final_copy.write_offset = Shape4D(0, 0, left, 0)
+ avg_pool_final_copy.read_shapes[0] = Shape4D(n, h + top + bot, w, c)
+ avg_pool_final_copy.read_offsets[0] = Shape4D(0, 0, 0, 0)
+
+ avg_pool_final_copy.add_input_tensor(intermediate_ofm_tens)
+ ofm.ops.append(avg_pool_final_copy)
+ avg_pool_final_copy.outputs = [ofm]
+ avg_pool_final_copy.set_ifm_ofm_shapes()
+ DebugDatabase.add_optimised(op, avg_pool_final_copy)
+
+ return op
+
+
def convert_pad(op: Operation, arch, nng):
"""
Rewrites PAD operator to an average pool that copies the IFM to the OFM
@@ -2899,6 +3023,7 @@ def tflite_optimise_graph(nng, arch, force_symmetric_int_weights):
convert_mul_max_to_abs_or_lrelu,
convert_lrelu,
convert_avg_pool_to_conv2d,
+ convert_mirror_pad,
fixup_strided_conv,
convert_hardswish_to_lut,
rewrite_fully_connected_input,
diff --git a/ethosu/vela/tflite_mapping.py b/ethosu/vela/tflite_mapping.py
index 0faa079..c599019 100644
--- a/ethosu/vela/tflite_mapping.py
+++ b/ethosu/vela/tflite_mapping.py
@@ -867,7 +867,11 @@ builtin_operator_map = {
OptionsSerializer("SquaredDifferenceOptions"),
TFLITE_IFM_IFM2_INDICES,
),
- BuiltinOperator.MIRROR_PAD: (Op.MirrorPad, OptionsSerializer("MirrorPadOptions", ("mode",)), TFLITE_NO_INDICES),
+ BuiltinOperator.MIRROR_PAD: (
+ Op.MirrorPad,
+ OptionsSerializer("MirrorPadOptions", ("mode",)),
+ TFLITE_IFM_IFM2_INDICES,
+ ),
BuiltinOperator.ABS: (Op.Abs, OptionsSerializer("AbsOptions"), TFLITE_IFM_INDICES),
BuiltinOperator.SPLIT_V: (Op.SplitV, OptionsSerializer("SplitVOptions", ("num_splits",)), TFLITE_IFM_INDICES),
BuiltinOperator.UNIQUE: (
diff --git a/ethosu/vela/tflite_model_semantic.py b/ethosu/vela/tflite_model_semantic.py
index eff40bc..1c258de 100644
--- a/ethosu/vela/tflite_model_semantic.py
+++ b/ethosu/vela/tflite_model_semantic.py
@@ -258,6 +258,9 @@ class TFLiteSemantic:
Op.Transpose: [
TFLiteSemantic.constraint_tens_quant_none_check,
],
+ Op.MirrorPad: [
+ TFLiteSemantic.constraint_tens_quant_none_check,
+ ],
}
return generic_constraints_exclude_list
diff --git a/ethosu/vela/tflite_supported_operators.py b/ethosu/vela/tflite_supported_operators.py
index 48813fe..ad61fca 100644
--- a/ethosu/vela/tflite_supported_operators.py
+++ b/ethosu/vela/tflite_supported_operators.py
@@ -106,10 +106,17 @@ class TFLiteSupportedOperators:
)
)
binary_elem_wise_main_ops = binary_elem_wise_min_max_ops | binary_elem_wise_add_mul_sub | binary_elem_wise_shift_ops
+
elem_wise_main_ops = binary_elem_wise_main_ops | unary_elem_wise_main_ops | set((Op.SquaredDifference,))
- pad_ops = set((Op.Pad,))
+ pad_ops = set(
+ (
+ Op.Pad,
+ Op.MirrorPad,
+ )
+ )
+
supported_int32_tensor_ops = (
- set((Op.ReduceSum, Op.CLZ, Op.Shape, Op.ArgMax, Op.Transpose))
+ set((Op.ReduceSum, Op.CLZ, Op.Shape, Op.ArgMax, Op.Transpose, Op.MirrorPad))
| binary_elem_wise_add_mul_sub
| binary_elem_wise_shift_ops
)
@@ -312,9 +319,13 @@ class TFLiteSupportedOperators:
self.specific_constraints[Op.StridedSlice].append(TFLiteSupportedOperators.constraint_stridedslice_offset_false)
# Pad specific checks:
- self.specific_constraints[Op.Pad].append(TFLiteSupportedOperators.constraint_pad_shape)
- self.specific_constraints[Op.Pad].append(TFLiteSupportedOperators.constraint_padding_dimensions)
- self.specific_constraints[Op.Pad].append(TFLiteSupportedOperators.constraint_pad_type)
+ for op_type in TFLiteSupportedOperators.pad_ops:
+ self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_pad_shape)
+ self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_padding_dimensions)
+ self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_pad_type)
+
+ # Mirror pad specific checks:
+ self.specific_constraints[Op.MirrorPad].append(TFLiteSupportedOperators.constraint_mirror_pad_padding_values)
# Mean specific checks:
self.specific_constraints[Op.Mean].append(TFLiteSupportedOperators.constraint_mean_height_width_product)
@@ -818,6 +829,16 @@ class TFLiteSupportedOperators:
return valid, f"First dimension padding: {pad_tensor[0,:]}, last dimension padding: {pad_tensor[-1,:]}"
@staticmethod
+ def constraint_mirror_pad_padding_values(op):
+ "The number of pad values for each direction must not be larger than the ifm size in that dimension"
+ pad_tensor = op.inputs[1].values
+ ifm_shape = op.inputs[0].shape
+ for dim_padding, ifm_dim_shape in enumerate(pad_tensor, ifm_shape):
+ if any(dim_padding > ifm_dim_shape):
+ valid = False
+ return valid, f"IFM shape: {ifm_shape}, number of padding values per dimension: {pad_tensor}"
+
+ @staticmethod
def constraint_stridedslice_stride_values(op):
"All Strides values must be 1"
strides = op.inputs[3]