aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/high_level_command_stream_generator.py
diff options
context:
space:
mode:
authorJohan Alfven <johan.alfven@arm.com>2023-02-02 09:07:48 +0100
committerJohan Alfven <johan.alfven@arm.com>2023-03-14 11:00:58 +0100
commit90724965751e882c58de74a044cc7adab307bc55 (patch)
tree425ccea87487b66ca298a801b298fbf8567f86d9 /ethosu/vela/high_level_command_stream_generator.py
parentbb9885190f5f7ea959f171b38ee1dd44d3e1e75e (diff)
downloadethos-u-vela-90724965751e882c58de74a044cc7adab307bc55.tar.gz
MLBEDSW-6260: Add support for using DMA to copy feature maps
- Reshape ops can be bypassed and there is no need to process them by the NPU. There are use cases when the IFM must be preserved so a memcpy is needed. This is implemented by an AvgPool. - In order to reduce the cost of the AvgPool the IFM can be copied by DMA. This is faster and also it can be turned into a real NOP in cases where the IFM and the OFM can use the same memory space. - Added new memcpy op. Only NHWC format supported since DMA can not change the format on the fly. - Allow ofm to reuse ifm for memcpy op - Make sure the DMA copy size is 16 byte aligned Change-Id: I3605a48d47646ff60d2bb3644dd3a23f872235a7 Signed-off-by: Johan Alfven <johan.alfven@arm.com>
Diffstat (limited to 'ethosu/vela/high_level_command_stream_generator.py')
-rw-r--r--ethosu/vela/high_level_command_stream_generator.py55
1 files changed, 36 insertions, 19 deletions
diff --git a/ethosu/vela/high_level_command_stream_generator.py b/ethosu/vela/high_level_command_stream_generator.py
index 5f6a93a3..770241bc 100644
--- a/ethosu/vela/high_level_command_stream_generator.py
+++ b/ethosu/vela/high_level_command_stream_generator.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2020-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -18,6 +18,7 @@
# Generate a high-level command stream from a schedule
from .high_level_command_stream import Box
from .high_level_command_stream import DMA
+from .high_level_command_stream import NOP
from .high_level_command_stream import NpuStripe
from .numeric_util import round_up_divide
from .operation import create_activation_function
@@ -33,6 +34,19 @@ def dma_if_necessary(ps, box, tensor):
yield DMA(ps, src_tensor, tensor, box)
+def dma_feature_map_if_necessary(ps, src_tensor, dst_tensor):
+ box = Box([0] * len(src_tensor.shape), list(src_tensor.shape))
+ src_addr = src_tensor.address_for_coordinate(box.start_coord)
+ dst_addr = dst_tensor.address_for_coordinate(box.start_coord)
+
+ if src_addr != dst_addr or src_tensor.mem_area != dst_tensor.mem_area:
+ yield DMA(ps, src_tensor, dst_tensor, box)
+ else:
+ # Source and destination is the same so no need for a DMA transaction
+ # Create a NOP for visibility when printing the high_level_command_stream
+ yield NOP(ps, src_tensor, dst_tensor)
+
+
def generate_high_level_command_stream_for_schedule(nng, sg, arch, verbose_high_level_command_stream):
res = []
# sg.sched_ops are ordered by execution
@@ -224,21 +238,24 @@ def generate_high_level_commands_for_sched_op(sched_op, schedule):
lut_dma_done = True
yield from dma_if_necessary(sched_op.parent_ps, lut_box, lut_tensor)
- yield NpuStripe(
- sched_op.parent_ps,
- block_config.old_style_representation(),
- is_first_h_stripe,
- is_last_h_stripe,
- ifm_tensor,
- ifm_box,
- ofm_tensor,
- ofm_box,
- weight_tensor,
- weight_box,
- scale_tensor,
- ifm2_tensor=ifm2_tensor,
- ifm2_box=ifm2_box,
- pad_top=pad_top,
- pad_bottom=pad_bottom,
- reversed_operands=sched_op.reversed_operands,
- )
+ if parent_op.type == Op.Memcpy:
+ yield from dma_feature_map_if_necessary(sched_op.parent_ps, ifm_tensor, ofm_tensor)
+ else:
+ yield NpuStripe(
+ sched_op.parent_ps,
+ block_config.old_style_representation(),
+ is_first_h_stripe,
+ is_last_h_stripe,
+ ifm_tensor,
+ ifm_box,
+ ofm_tensor,
+ ofm_box,
+ weight_tensor,
+ weight_box,
+ scale_tensor,
+ ifm2_tensor=ifm2_tensor,
+ ifm2_box=ifm2_box,
+ pad_top=pad_top,
+ pad_bottom=pad_bottom,
+ reversed_operands=sched_op.reversed_operands,
+ )