aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/insert_dma.py
diff options
context:
space:
mode:
Diffstat (limited to 'ethosu/vela/insert_dma.py')
-rw-r--r--ethosu/vela/insert_dma.py48
1 files changed, 27 insertions, 21 deletions
diff --git a/ethosu/vela/insert_dma.py b/ethosu/vela/insert_dma.py
index 7049a05f..5c05fc8f 100644
--- a/ethosu/vela/insert_dma.py
+++ b/ethosu/vela/insert_dma.py
@@ -19,6 +19,7 @@ from . import rewrite_graph
from .operation import NpuBlockType
from .operation import Operation
from .tensor import MemArea
+from .tensor import MemType
from .tensor import TensorPurpose
@@ -30,29 +31,34 @@ def insert_dma_cmd(op, arch):
return op # Already rewritten
for idx, tens in enumerate(op.inputs):
- if tens.mem_area in (MemArea.Dram, MemArea.OffChipFlash) and tens.mem_area != arch.fast_storage_mem_area:
- if tens.purpose == TensorPurpose.Weights or (
- tens.purpose == TensorPurpose.FeatureMap and op.type in binary_elementwise_op and tens.shape != []
+ if tens.mem_type not in (MemType.Scratch, MemType.Scratch_fast):
+ # Tensor is in permanent storage
+ # Only when permanent storage differs from fast storage, there is a point moving the data
+ if tens.mem_area in (MemArea.Dram, MemArea.OffChipFlash) and (
+ arch.permanent_storage_mem_area != arch.fast_storage_mem_area
):
- only_vector_product_consumers = True
- for oper in tens.consumers():
- if oper is None or oper.attrs.get("npu_block_type") != NpuBlockType.VectorProduct:
- only_vector_product_consumers = False
- break
+ if tens.purpose == TensorPurpose.Weights or (
+ tens.purpose == TensorPurpose.FeatureMap and op.type in binary_elementwise_op and tens.shape != []
+ ):
+ only_vector_product_consumers = True
+ for oper in tens.consumers():
+ if oper is None or oper.attrs.get("npu_block_type") != NpuBlockType.VectorProduct:
+ only_vector_product_consumers = False
+ break
- # Tensor products has no need for DMA, tensors are only read once and can be in flash.
- # Other operations re-reads tensors, this is better done from SRAM.
- if not only_vector_product_consumers:
- # Insert a DMA command here, as well as a new tensor situated in SRAM of the same size.
- new_tens = tens.clone_into_fast_storage(arch)
- dma_cmd = Operation("DMA", tens.ops[0].name + "_dma")
- dma_cmd.inputs = [tens]
- dma_cmd.outputs = [new_tens]
- dma_cmd.attrs["source"] = tens.mem_area
- dma_cmd.attrs["destination"] = new_tens.mem_area
- dma_cmd.run_on_npu = True
- new_tens.ops = [dma_cmd]
- op.inputs[idx] = new_tens
+ # Tensor products has no need for DMA, tensors are only read once and can be in flash.
+ # Other operations re-reads tensors, this is better done from SRAM.
+ if not only_vector_product_consumers:
+ # Insert a DMA command here, as well as a new tensor situated in SRAM of the same size.
+ new_tens = tens.clone_into_fast_storage(arch)
+ dma_cmd = Operation("DMA", tens.ops[0].name + "_dma")
+ dma_cmd.inputs = [tens]
+ dma_cmd.outputs = [new_tens]
+ dma_cmd.attrs["source"] = tens.mem_area
+ dma_cmd.attrs["destination"] = new_tens.mem_area
+ dma_cmd.run_on_npu = True
+ new_tens.ops = [dma_cmd]
+ op.inputs[idx] = new_tens
return op