From 90724965751e882c58de74a044cc7adab307bc55 Mon Sep 17 00:00:00 2001 From: Johan Alfven Date: Thu, 2 Feb 2023 09:07:48 +0100 Subject: MLBEDSW-6260: Add support for using DMA to copy feature maps - Reshape ops can be bypassed and there is no need to process them by the NPU. There are use cases when the IFM must be preserved so a memcpy is needed. This is implemented by an AvgPool. - In order to reduce the cost of the AvgPool the IFM can be copied by DMA. This is faster and also it can be turned into a real NOP in cases where the IFM and the OFM can use the same memory space. - Added new memcpy op. Only NHWC format supported since DMA can not change the format on the fly. - Allow ofm to reuse ifm for memcpy op - Make sure the DMA copy size is 16 byte aligned Change-Id: I3605a48d47646ff60d2bb3644dd3a23f872235a7 Signed-off-by: Johan Alfven --- ethosu/vela/live_range.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'ethosu/vela/live_range.py') diff --git a/ethosu/vela/live_range.py b/ethosu/vela/live_range.py index 05e481e0..995a0ccb 100644 --- a/ethosu/vela/live_range.py +++ b/ethosu/vela/live_range.py @@ -165,16 +165,11 @@ def tensor_should_be_ignored(tens, target_mem_area, target_mem_type_set): def _get_ifm_to_fuse(sched_op, target_mem_area=None, target_mem_type_set=None): - def _tensor_should_be_ignored(tens): - if tens.ifm_write_protected: - return True - return tensor_should_be_ignored(tens, target_mem_area, target_mem_type_set) - - # Check if possible to merge ifm/ofm live ranges of elementwise op ifm_tens = None if sched_op.op_type.is_elementwise_op(): + # Check if possible to merge ifm/ofm live ranges of elementwise op elem_op = sched_op.parent_op - if not _tensor_should_be_ignored(elem_op.ofm): + if not tensor_should_be_ignored(elem_op.ofm, target_mem_area, target_mem_type_set): # Check if overwriting the inputs can be allowed OpShapeTens = namedtuple("OpShapeTens", ["op_shape", "tens"]) outp = OpShapeTens(elem_op.ofm_shapes[0], elem_op.ofm) @@ -183,7 +178,6 @@ def _get_ifm_to_fuse(sched_op, target_mem_area=None, target_mem_type_set=None): inps.append(OpShapeTens(elem_op.ifm_shapes[0], elem_op.ifm)) if elem_op.ifm2 is not None: inps.append(OpShapeTens(elem_op.ifm_shapes[1], elem_op.ifm2)) - # find an input tensor that can be overwritten by the output for inp in inps: if ( @@ -192,7 +186,8 @@ def _get_ifm_to_fuse(sched_op, target_mem_area=None, target_mem_type_set=None): # check input tensor is valid and inp.tens is not None and inp.tens.shape != [] - and not _tensor_should_be_ignored(inp.tens) + and not inp.tens.ifm_write_protected + and not tensor_should_be_ignored(inp.tens, target_mem_area, target_mem_type_set) # check input and output tensors are compatible and inp.tens.format == outp.tens.format and inp.tens.dtype == outp.tens.dtype @@ -203,6 +198,17 @@ def _get_ifm_to_fuse(sched_op, target_mem_area=None, target_mem_type_set=None): ): ifm_tens = inp.tens break + elif sched_op.op_type == Op.Memcpy: + # Check if possible to merge ifm/ofm live ranges of dma op + dma_op = sched_op.parent_op + ifm = dma_op.ifm + ofm = dma_op.ofm + if not ( + tensor_should_be_ignored(ifm, target_mem_area, target_mem_type_set) + or tensor_should_be_ignored(ofm, target_mem_area, target_mem_type_set) + ): + # Currently DMA only used when bypassing memory only ops so ok to reuse ifm + ifm_tens = ifm return ifm_tens -- cgit v1.2.1