aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/live_range.py
diff options
context:
space:
mode:
authorJohan Alfven <johan.alfven@arm.com>2023-02-02 09:07:48 +0100
committerJohan Alfven <johan.alfven@arm.com>2023-03-14 11:00:58 +0100
commit90724965751e882c58de74a044cc7adab307bc55 (patch)
tree425ccea87487b66ca298a801b298fbf8567f86d9 /ethosu/vela/live_range.py
parentbb9885190f5f7ea959f171b38ee1dd44d3e1e75e (diff)
downloadethos-u-vela-90724965751e882c58de74a044cc7adab307bc55.tar.gz
MLBEDSW-6260: Add support for using DMA to copy feature maps
- Reshape ops can be bypassed and there is no need to process them by the NPU. There are use cases when the IFM must be preserved so a memcpy is needed. This is implemented by an AvgPool. - In order to reduce the cost of the AvgPool the IFM can be copied by DMA. This is faster and also it can be turned into a real NOP in cases where the IFM and the OFM can use the same memory space. - Added new memcpy op. Only NHWC format supported since DMA can not change the format on the fly. - Allow ofm to reuse ifm for memcpy op - Make sure the DMA copy size is 16 byte aligned Change-Id: I3605a48d47646ff60d2bb3644dd3a23f872235a7 Signed-off-by: Johan Alfven <johan.alfven@arm.com>
Diffstat (limited to 'ethosu/vela/live_range.py')
-rw-r--r--ethosu/vela/live_range.py24
1 files changed, 15 insertions, 9 deletions
diff --git a/ethosu/vela/live_range.py b/ethosu/vela/live_range.py
index 05e481e0..995a0ccb 100644
--- a/ethosu/vela/live_range.py
+++ b/ethosu/vela/live_range.py
@@ -165,16 +165,11 @@ def tensor_should_be_ignored(tens, target_mem_area, target_mem_type_set):
def _get_ifm_to_fuse(sched_op, target_mem_area=None, target_mem_type_set=None):
- def _tensor_should_be_ignored(tens):
- if tens.ifm_write_protected:
- return True
- return tensor_should_be_ignored(tens, target_mem_area, target_mem_type_set)
-
- # Check if possible to merge ifm/ofm live ranges of elementwise op
ifm_tens = None
if sched_op.op_type.is_elementwise_op():
+ # Check if possible to merge ifm/ofm live ranges of elementwise op
elem_op = sched_op.parent_op
- if not _tensor_should_be_ignored(elem_op.ofm):
+ if not tensor_should_be_ignored(elem_op.ofm, target_mem_area, target_mem_type_set):
# Check if overwriting the inputs can be allowed
OpShapeTens = namedtuple("OpShapeTens", ["op_shape", "tens"])
outp = OpShapeTens(elem_op.ofm_shapes[0], elem_op.ofm)
@@ -183,7 +178,6 @@ def _get_ifm_to_fuse(sched_op, target_mem_area=None, target_mem_type_set=None):
inps.append(OpShapeTens(elem_op.ifm_shapes[0], elem_op.ifm))
if elem_op.ifm2 is not None:
inps.append(OpShapeTens(elem_op.ifm_shapes[1], elem_op.ifm2))
-
# find an input tensor that can be overwritten by the output
for inp in inps:
if (
@@ -192,7 +186,8 @@ def _get_ifm_to_fuse(sched_op, target_mem_area=None, target_mem_type_set=None):
# check input tensor is valid
and inp.tens is not None
and inp.tens.shape != []
- and not _tensor_should_be_ignored(inp.tens)
+ and not inp.tens.ifm_write_protected
+ and not tensor_should_be_ignored(inp.tens, target_mem_area, target_mem_type_set)
# check input and output tensors are compatible
and inp.tens.format == outp.tens.format
and inp.tens.dtype == outp.tens.dtype
@@ -203,6 +198,17 @@ def _get_ifm_to_fuse(sched_op, target_mem_area=None, target_mem_type_set=None):
):
ifm_tens = inp.tens
break
+ elif sched_op.op_type == Op.Memcpy:
+ # Check if possible to merge ifm/ofm live ranges of dma op
+ dma_op = sched_op.parent_op
+ ifm = dma_op.ifm
+ ofm = dma_op.ofm
+ if not (
+ tensor_should_be_ignored(ifm, target_mem_area, target_mem_type_set)
+ or tensor_should_be_ignored(ofm, target_mem_area, target_mem_type_set)
+ ):
+ # Currently DMA only used when bypassing memory only ops so ok to reuse ifm
+ ifm_tens = ifm
return ifm_tens