aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrik Gustavsson <patrik.gustavsson@arm.com>2020-08-17 12:43:22 +0200
committertim.hall <tim.hall@arm.com>2020-08-19 11:28:03 +0000
commitfa34c6f4eebec25814c1e620a85721416f4d4ce3 (patch)
tree05115c9920ccfc36077aa66f77e3312bfd69985b
parenteebb1c2190aa48dfbfbbabd3992cbad197f33c34 (diff)
downloadethos-u-vela-fa34c6f4eebec25814c1e620a85721416f4d4ce3.tar.gz
MLBEDSW-2636 Prevent DMA of weight to Sram in some cases
DMA transfer of weights is prevented when the weight double buffer is assumed to not fit Sram. Signed-off-by: Patrik Gustavsson <patrik.gustavsson@arm.com> Change-Id: I9809dca1d4b335436e1a0b81093640361ada255e
-rw-r--r--ethosu/vela/insert_dma.py27
1 files changed, 26 insertions, 1 deletions
diff --git a/ethosu/vela/insert_dma.py b/ethosu/vela/insert_dma.py
index 6cd2202c..c2382863 100644
--- a/ethosu/vela/insert_dma.py
+++ b/ethosu/vela/insert_dma.py
@@ -26,6 +26,29 @@ from .tensor import TensorPurpose
binary_elementwise_op = set(("AddAct", "MulAct", "SubAct", "Maximum", "Minimum"))
+def weights_fit_sram(arch, tens):
+ if tens.purpose != TensorPurpose.Weights:
+ return True
+
+ min_weight_size = 0
+ if len(tens.shape) == 4:
+ min_weight_size = tens.shape[0] * tens.shape[1] * tens.shape[2] * arch.OFMSplitDepth
+ elif len(tens.shape) == 2:
+ min_weight_size = tens.shape[0] * arch.OFMSplitDepth
+
+ w_compression = 1 # TODO worst compression ratio currently assumed
+
+ # Need to be fit into Sram, as a double buffer
+ if (w_compression * min_weight_size * 2) > arch.sram_size:
+ print(
+ "Weights, {}, are too big to be DMAed to SRAM, estimated minimum size is {} bytes".format(
+ tens.name, (w_compression * min_weight_size * 2)
+ )
+ )
+ return False
+ return True
+
+
def insert_dma_cmd(op, arch):
if op.type == "DMA" or not op.run_on_npu:
return op
@@ -51,7 +74,9 @@ def insert_dma_cmd(op, arch):
# Tensor products has no need for DMA, tensors are only read once and can be in flash.
# Other operations re-reads tensors, this is better done from SRAM.
# LUTs must be placed in the last 2 blocks of SHRAM.
- if not only_vector_product_consumers or tens.purpose == TensorPurpose.LUT:
+ if (
+ not only_vector_product_consumers and weights_fit_sram(arch, tens)
+ ) or tens.purpose == TensorPurpose.LUT:
# Insert a DMA command here, as well as a new tensor situated in SRAM of the same size.
new_tens = tens.clone_into_fast_storage(arch)
dma_cmd = Operation("DMA", tens.ops[0].name + "_dma")