From 0b8268a0dac80aa22133ca83ed6912d3b565439a Mon Sep 17 00:00:00 2001
From: Louis Verhaard <louis.verhaard@arm.com>
Date: Wed, 5 Aug 2020 16:11:29 +0200
Subject: MLBEDSW-2688: Improved LUT support

- Support for more than one 256-byte LUT in SHRAM
- No DMA is performed for a LUT that is already located in SHRAM
- Added MemArea.Shram, used for LUT, to avoid false address collision
  asserts during SRAM tensor allocation
- Added read access to LUT in memory access calculation

Change-Id: If4d1eded5ed029d253f4f5efb2d80495fc3eac99
Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
---
 ethosu/vela/pass_packing.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'ethosu/vela/pass_packing.py')

diff --git a/ethosu/vela/pass_packing.py b/ethosu/vela/pass_packing.py
index 8e108dbf..7b69e35d 100644
--- a/ethosu/vela/pass_packing.py
+++ b/ethosu/vela/pass_packing.py
@@ -381,12 +381,18 @@ def pack_into_passes(nng, arch, verbose_packing=False):
                         input_set.add(input_tens)
 
         ordered_input_list = []
+        # Keep LUT-s in a separate list and add as inputs at the end
+        # to avoid that they would accidentally be assigned as ifm or ifm2
+        lut_list = []
         input_refcounts = collections.defaultdict(int)
         for op in ops_list:
             for inp in op.inputs:
                 if inp in input_set:
                     if input_refcounts[inp] == 0:
-                        ordered_input_list.append(inp)
+                        if inp.purpose == TensorPurpose.LUT:
+                            lut_list.append(inp)
+                        else:
+                            ordered_input_list.append(inp)
                     input_refcounts[inp] += 1
 
         name = ops_list[0].name
@@ -416,6 +422,7 @@ def pack_into_passes(nng, arch, verbose_packing=False):
         ps.weight_tensor = ps.get_primary_op_ifm_weights()[1]
         ps.scale_tensor = ps.get_primary_op_ifm_weights_biases_ofm()[2]
         ps.lut_tensor = ps.get_primary_op_lut()
+        ps.inputs.extend(lut_list)
 
         for op in ps.ops:
             op.scheduled_pass = ps
-- 
cgit v1.2.1