aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/tflite_graph_optimiser.py
diff options
context:
space:
mode:
Diffstat (limited to 'ethosu/vela/tflite_graph_optimiser.py')
-rw-r--r--ethosu/vela/tflite_graph_optimiser.py125
1 files changed, 125 insertions, 0 deletions
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index cc947bc..ad979bd 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -1881,6 +1881,130 @@ def replace_pad_by_hw_pad(op: Operation, arch, nng) -> Operation:
return op
+def convert_mirror_pad(op: Operation, arch, nng):
+ if op.type != Op.MirrorPad or not op.run_on_npu:
+ return op
+
+ _, (top, bot), (left, right), _ = op.ifm2.values
+ mode = op.attrs["mode"] # 0 = reflect, 1 = symmetric
+
+ ifm = op.ifm
+ ofm = op.ofm
+ ofm.ops = []
+ elem_size = 2 if ofm.dtype == DataType.int16 else 1
+ n, h, w, c = ifm.shape
+ _, oh, ow, _ = ofm.shape
+ # Force linear format on OFM to allow negative stride multipliers
+ ofm.force_linear_format = True
+
+ # Intermediate ofm needed to store ifm padded with top and bot values as input to the left and right padding
+ intermediate_ofm_tens = Tensor([n, h + top + bot, w, c], ofm.dtype, "intermediate_ofm_tens")
+ intermediate_ofm_tens.quantization = op.outputs[0].quantization.clone()
+ intermediate_ofm_tens.force_linear_format = True
+
+ # If there is no left or right padding, we can write directly to the ofm without an intermediate tensor
+ if not (left or right):
+ intermediate_ofm_tens = ofm
+
+ # Initial op to copy the ifm into the middle of the intermediate ofm
+ avg_pool_init = create_avgpool_nop("init_pool")
+ avg_pool_init.write_shape = Shape4D(n, h, w, c)
+ avg_pool_init.write_offset = Shape4D(0, top, 0, 0)
+ avg_pool_init.read_shapes[0] = Shape4D(n, h, w, c)
+ avg_pool_init.read_offsets[0] = Shape4D(0, 0, 0, 0)
+ avg_pool_init.add_input_tensor(ifm)
+ avg_pool_init.set_output_tensor(intermediate_ofm_tens)
+ avg_pool_init.set_ifm_ofm_shapes()
+ DebugDatabase.add_optimised(op, avg_pool_init)
+
+ # Create pools with negative stride to mirror edges and offset to write at padding positions
+ avg_pool_pad = create_avgpool_nop("pad_pool")
+ for i, pad_amount in enumerate([top, bot, left, right]):
+ # Clear input from previous cloned op
+ avg_pool_pad.inputs = []
+ if not pad_amount:
+ continue
+
+ if i == 0: # top
+ # Set read and write shape width to full ifm width and height to "top" pad size
+ avg_pool_pad.write_shape = Shape4D(n, top, w, c)
+ avg_pool_pad.read_shapes[0] = Shape4D(n, top, w, c)
+ # Leave read offset as default to read the top chunk of the ifm
+ # For reflect mode, shift height offset down one step to "skip" the edge
+ avg_pool_pad.read_offsets[0] = Shape4D(0, 0, 0, 0) if mode == 1 else Shape4D(0, 1, 0, 0)
+ # Offset the base address of tile 0 to start writing just above the ifm that was copied into the middle of
+ # the ofm and use negative height striding to mirror the above ifm chunk
+ avg_pool_pad.tile_base_offsets_ofm[0] = ((top - 1) * w) * c * elem_size
+ if i == 1: # bot
+ # Set read and write shape width to full ifm width and height to "bot" pad size
+ avg_pool_pad.write_shape = Shape4D(n, bot, w, c)
+ avg_pool_pad.read_shapes[0] = Shape4D(n, bot, w, c)
+ # Set read offset to read the bottom chunk of the ifm
+ # For reflect mode, shift height offset up one step to "skip" the edge
+ avg_pool_pad.read_offsets[0] = Shape4D(0, h - bot, 0, 0) if mode == 1 else Shape4D(0, h - bot - 1, 0, 0)
+ # Offset the base address of tile 0 to start writing at the very bottom of the ofm and use negative height
+ # striding to mirror the above ifm chunk
+ avg_pool_pad.tile_base_offsets_ofm[0] = (oh - 1) * w * c * elem_size
+ if i == 2: # left
+ # Set read and write shape height to full intermediate ofm height and width to "left" pad size
+ avg_pool_pad.write_shape = Shape4D(n, h + top + bot, left, c)
+ avg_pool_pad.read_shapes[0] = Shape4D(n, h + top + bot, left, c)
+ # Leave read offset as default to read the leftmost chunk of the intermediate ofm
+ # For reflect mode, shift width offset one step to the right to "skip" the edge
+ avg_pool_pad.read_offsets[0] = Shape4D(0, 0, 0, 0) if mode == 1 else Shape4D(0, 0, 1, 0)
+ # Offset the base address of tile 0 to start writing just left of the intermediate ofm and use negative
+ # width striding to mirror the above ifm chunk
+ avg_pool_pad.tile_base_offsets_ofm[0] = (left - 1) * c * elem_size
+ if i == 3: # right
+ # Set read and write shape height to full intermediate ofm height and width to "right" pad size
+ avg_pool_pad.write_shape = Shape4D(n, h + top + bot, right, c)
+ avg_pool_pad.read_shapes[0] = Shape4D(n, h + top + bot, right, c)
+ # Set read offset to read the rightmost chunk of the intermediate ofm
+ # For reflect mode, shift width offset one step to the left to "skip" the edge
+ avg_pool_pad.read_offsets[0] = Shape4D(0, 0, w - right, 0) if mode == 1 else Shape4D(0, 0, w - right - 1, 0)
+ # Offset the base address of tile 0 to start writing at the rightmost part of the ofm and use negative
+ # width striding to mirror the above ifm chunk
+ avg_pool_pad.tile_base_offsets_ofm[0] = (ow - 1) * c * elem_size
+
+ # Write offset (0,0,0,0) for all convs
+ avg_pool_pad.write_offset = Shape4D(0, 0, 0, 0)
+
+ if i in [0, 1]: # negative height stride for top and bot, negative width stride for left and right
+ avg_pool_pad.ofm_stride_multiplier = [1, -1, 1] # C/H/W
+ # top and bot reads from ifm and writes to intermediate ofm
+ avg_pool_pad.add_input_tensor(ifm)
+ intermediate_ofm_tens.ops.append(avg_pool_pad)
+ avg_pool_pad.outputs = [intermediate_ofm_tens]
+ else:
+ avg_pool_pad.ofm_stride_multiplier = [1, 1, -1] # C/H/W
+ # left and right reads from intermediate ofm and writes to ofm
+ avg_pool_pad.add_input_tensor(intermediate_ofm_tens)
+ ofm.ops.append(avg_pool_pad)
+ avg_pool_pad.outputs = [ofm]
+
+ avg_pool_pad.set_ifm_ofm_shapes()
+ DebugDatabase.add_optimised(op, avg_pool_pad)
+
+ # Clone operation for next padding direction
+ avg_pool_pad = avg_pool_pad.clone(f"_{i}")
+
+ if left or right:
+ # Copy intermediate ofm into final ofm
+ avg_pool_final_copy = create_avgpool_nop("avg_pool_final_copy")
+ avg_pool_final_copy.write_shape = Shape4D(n, h + top + bot, w, c)
+ avg_pool_final_copy.write_offset = Shape4D(0, 0, left, 0)
+ avg_pool_final_copy.read_shapes[0] = Shape4D(n, h + top + bot, w, c)
+ avg_pool_final_copy.read_offsets[0] = Shape4D(0, 0, 0, 0)
+
+ avg_pool_final_copy.add_input_tensor(intermediate_ofm_tens)
+ ofm.ops.append(avg_pool_final_copy)
+ avg_pool_final_copy.outputs = [ofm]
+ avg_pool_final_copy.set_ifm_ofm_shapes()
+ DebugDatabase.add_optimised(op, avg_pool_final_copy)
+
+ return op
+
+
def convert_pad(op: Operation, arch, nng):
"""
Rewrites PAD operator to an average pool that copies the IFM to the OFM
@@ -2899,6 +3023,7 @@ def tflite_optimise_graph(nng, arch, force_symmetric_int_weights):
convert_mul_max_to_abs_or_lrelu,
convert_lrelu,
convert_avg_pool_to_conv2d,
+ convert_mirror_pad,
fixup_strided_conv,
convert_hardswish_to_lut,
rewrite_fully_connected_input,