diff options
author | Johan Alfven <johan.alfven@arm.com> | 2023-02-02 09:07:48 +0100 |
---|---|---|
committer | Johan Alfven <johan.alfven@arm.com> | 2023-03-14 11:00:58 +0100 |
commit | 90724965751e882c58de74a044cc7adab307bc55 (patch) | |
tree | 425ccea87487b66ca298a801b298fbf8567f86d9 /ethosu/vela/live_range.py | |
parent | bb9885190f5f7ea959f171b38ee1dd44d3e1e75e (diff) | |
download | ethos-u-vela-90724965751e882c58de74a044cc7adab307bc55.tar.gz |
MLBEDSW-6260: Add support for using DMA to copy feature maps
- Reshape ops can be bypassed and there is no need to process them by the NPU.
There are use cases when the IFM must be preserved so a memcpy is needed.
This is implemented by an AvgPool.
- In order to reduce the cost of the AvgPool the IFM can be copied by DMA.
This is faster and also it can be turned into a real NOP in cases where
the IFM and the OFM can use the same memory space.
- Added new memcpy op. Only NHWC format supported since DMA can not change
the format on the fly.
- Allow ofm to reuse ifm for memcpy op
- Make sure the DMA copy size is 16 byte aligned
Change-Id: I3605a48d47646ff60d2bb3644dd3a23f872235a7
Signed-off-by: Johan Alfven <johan.alfven@arm.com>
Diffstat (limited to 'ethosu/vela/live_range.py')
-rw-r--r-- | ethosu/vela/live_range.py | 24 |
1 files changed, 15 insertions, 9 deletions
diff --git a/ethosu/vela/live_range.py b/ethosu/vela/live_range.py index 05e481e0..995a0ccb 100644 --- a/ethosu/vela/live_range.py +++ b/ethosu/vela/live_range.py @@ -165,16 +165,11 @@ def tensor_should_be_ignored(tens, target_mem_area, target_mem_type_set): def _get_ifm_to_fuse(sched_op, target_mem_area=None, target_mem_type_set=None): - def _tensor_should_be_ignored(tens): - if tens.ifm_write_protected: - return True - return tensor_should_be_ignored(tens, target_mem_area, target_mem_type_set) - - # Check if possible to merge ifm/ofm live ranges of elementwise op ifm_tens = None if sched_op.op_type.is_elementwise_op(): + # Check if possible to merge ifm/ofm live ranges of elementwise op elem_op = sched_op.parent_op - if not _tensor_should_be_ignored(elem_op.ofm): + if not tensor_should_be_ignored(elem_op.ofm, target_mem_area, target_mem_type_set): # Check if overwriting the inputs can be allowed OpShapeTens = namedtuple("OpShapeTens", ["op_shape", "tens"]) outp = OpShapeTens(elem_op.ofm_shapes[0], elem_op.ofm) @@ -183,7 +178,6 @@ def _get_ifm_to_fuse(sched_op, target_mem_area=None, target_mem_type_set=None): inps.append(OpShapeTens(elem_op.ifm_shapes[0], elem_op.ifm)) if elem_op.ifm2 is not None: inps.append(OpShapeTens(elem_op.ifm_shapes[1], elem_op.ifm2)) - # find an input tensor that can be overwritten by the output for inp in inps: if ( @@ -192,7 +186,8 @@ def _get_ifm_to_fuse(sched_op, target_mem_area=None, target_mem_type_set=None): # check input tensor is valid and inp.tens is not None and inp.tens.shape != [] - and not _tensor_should_be_ignored(inp.tens) + and not inp.tens.ifm_write_protected + and not tensor_should_be_ignored(inp.tens, target_mem_area, target_mem_type_set) # check input and output tensors are compatible and inp.tens.format == outp.tens.format and inp.tens.dtype == outp.tens.dtype @@ -203,6 +198,17 @@ def _get_ifm_to_fuse(sched_op, target_mem_area=None, target_mem_type_set=None): ): ifm_tens = inp.tens break + elif sched_op.op_type == Op.Memcpy: + # Check if possible to merge ifm/ofm live ranges of dma op + dma_op = sched_op.parent_op + ifm = dma_op.ifm + ofm = dma_op.ofm + if not ( + tensor_should_be_ignored(ifm, target_mem_area, target_mem_type_set) + or tensor_should_be_ignored(ofm, target_mem_area, target_mem_type_set) + ): + # Currently DMA only used when bypassing memory only ops so ok to reuse ifm + ifm_tens = ifm return ifm_tens |