From fa34c6f4eebec25814c1e620a85721416f4d4ce3 Mon Sep 17 00:00:00 2001 From: Patrik Gustavsson Date: Mon, 17 Aug 2020 12:43:22 +0200 Subject: MLBEDSW-2636 Prevent DMA of weight to Sram in some cases DMA transfer of weights is prevented when the weight double buffer is assumed to not fit Sram. Signed-off-by: Patrik Gustavsson Change-Id: I9809dca1d4b335436e1a0b81093640361ada255e --- ethosu/vela/insert_dma.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'ethosu/vela') diff --git a/ethosu/vela/insert_dma.py b/ethosu/vela/insert_dma.py index 6cd2202c..c2382863 100644 --- a/ethosu/vela/insert_dma.py +++ b/ethosu/vela/insert_dma.py @@ -26,6 +26,29 @@ from .tensor import TensorPurpose binary_elementwise_op = set(("AddAct", "MulAct", "SubAct", "Maximum", "Minimum")) +def weights_fit_sram(arch, tens): + if tens.purpose != TensorPurpose.Weights: + return True + + min_weight_size = 0 + if len(tens.shape) == 4: + min_weight_size = tens.shape[0] * tens.shape[1] * tens.shape[2] * arch.OFMSplitDepth + elif len(tens.shape) == 2: + min_weight_size = tens.shape[0] * arch.OFMSplitDepth + + w_compression = 1 # TODO worst compression ratio currently assumed + + # Need to be fit into Sram, as a double buffer + if (w_compression * min_weight_size * 2) > arch.sram_size: + print( + "Weights, {}, are too big to be DMAed to SRAM, estimated minimum size is {} bytes".format( + tens.name, (w_compression * min_weight_size * 2) + ) + ) + return False + return True + + def insert_dma_cmd(op, arch): if op.type == "DMA" or not op.run_on_npu: return op @@ -51,7 +74,9 @@ def insert_dma_cmd(op, arch): # Tensor products has no need for DMA, tensors are only read once and can be in flash. # Other operations re-reads tensors, this is better done from SRAM. # LUTs must be placed in the last 2 blocks of SHRAM. - if not only_vector_product_consumers or tens.purpose == TensorPurpose.LUT: + if ( + not only_vector_product_consumers and weights_fit_sram(arch, tens) + ) or tens.purpose == TensorPurpose.LUT: # Insert a DMA command here, as well as a new tensor situated in SRAM of the same size. new_tens = tens.clone_into_fast_storage(arch) dma_cmd = Operation("DMA", tens.ops[0].name + "_dma") -- cgit v1.2.1