MLBEDSW-2688: LUT DMA may require kernel wait

LUT related updates specific for 16K SHRAM: - prevent LUT DMA transfer from overwriting accumulator SHRAM of an ongoing operation - do not use the last 2K of SHRAM as accumulator during LUT operations Change-Id: I17066e0410c6f07b125ed245002d7b19269a7a8a Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
author: Louis Verhaard <louis.verhaard@arm.com> 2020-08-21 14:06:25 +0200
committer: Louis Verhaard <louis.verhaard@arm.com> 2020-08-26 08:18:27 +0200
commit: 814cfbb8124ba0b3828db2bb12d9342ae9c39f19 (patch)
tree: 519f7f41091efa944f6c4e3eb732892c56da40e1
parent: 7579c75d870c25ee075e46a110b6b89cf266db64 (diff)
download: ethos-u-vela-814cfbb8124ba0b3828db2bb12d9342ae9c39f19.tar.gz
2 files changed, 16 insertions, 1 deletions
diff --git a/ethosu/vela/high_level_command_stream.py b/ethosu/vela/high_level_command_stream.py
index 95af1ccb..b8a19f50 100644
--- a/ethosu/vela/high_level_command_stream.py
+++ b/ethosu/vela/high_level_command_stream.py
@@ -243,6 +243,10 @@ class NpuStripe(Command):
                     MemoryRangeSet(tens.mem_area, tens.address, tens.address + tens.storage_size()),
                     AccessDirection.Read,
                 )
+        # Add write access to SHRAM, needed when LUTs can overwrite accumulator banks
+        res.add(
+            self.ps.shared_buffer.get_shram_memory_access_range(), AccessDirection.Write,
+        )
         return res
 
     def is_npu_pass_command(self):
diff --git a/ethosu/vela/shared_buffer_allocation.py b/ethosu/vela/shared_buffer_allocation.py
index 053377c4..fdcbe94a 100644
--- a/ethosu/vela/shared_buffer_allocation.py
+++ b/ethosu/vela/shared_buffer_allocation.py
@@ -25,6 +25,8 @@ from .architecture_features import SHRAMElements
 from .errors import VelaError
 from .ethos_u55_regs.ethos_u55_regs import resampling_mode
 from .operation import NpuBlockType
+from .range_set import MemoryRangeSet
+from .tensor import MemArea
 
 
 class SharedBufferAllocation:
@@ -40,6 +42,7 @@ class SharedBufferAllocation:
         dilation = (1, 1, 1, 1)
         self.kernel = Kernel(1, 1)
         is_elementwise = ps.npu_block_type == NpuBlockType.ElementWise
+        self.uses_lut = False
 
         if ps.primary_op:
             strides = ps.primary_op.attrs.get("strides", strides)
@@ -55,6 +58,7 @@ class SharedBufferAllocation:
                 k_w = ps.primary_op.attrs.get("filter_width", 1)
 
             self.kernel = Kernel(k_w, k_h, strides[2], strides[1], dilation[2], dilation[1])
+            self.uses_lut = ps.primary_op.activation_lut is not None
 
         self.is_equal_depth_op = is_elementwise or ps.npu_block_type in (
             NpuBlockType.ConvolutionDepthWise,
@@ -102,7 +106,7 @@ class SharedBufferAllocation:
 
         # Accumulator area is measured from the end of the buffer
         self.bank_locations[SharedBufferArea.Accumulators] = (
-            self.arch.shram_total_banks - self.banks_required[SharedBufferArea.Accumulators]
+            self.arch.available_shram_banks(self.uses_lut) - self.banks_required[SharedBufferArea.Accumulators]
         )
         ifm_end = self.bank_locations[SharedBufferArea.IFM] + self.banks_required[SharedBufferArea.IFM]
         return ifm_end <= self.bank_locations[SharedBufferArea.Accumulators]
@@ -156,6 +160,13 @@ class SharedBufferAllocation:
 
         return True
 
+    def get_shram_memory_access_range(self):
+        # Returns the SHRAM memory access range used by this shared buffer,
+        # excluding access to LUT
+        return MemoryRangeSet(
+            MemArea.Shram, 0, self.arch.available_shram_banks(self.uses_lut) * self.arch.shram_bank_size
+        )
+
 
 def shared_buffer_allocation_for_pass_and_block_config(arch, ps, block_config):
     alloc = SharedBufferAllocation(arch, ps)
author	Louis Verhaard <louis.verhaard@arm.com>	2020-08-21 14:06:25 +0200
committer	Louis Verhaard <louis.verhaard@arm.com>	2020-08-26 08:18:27 +0200
commit	814cfbb8124ba0b3828db2bb12d9342ae9c39f19 (patch)
tree	519f7f41091efa944f6c4e3eb732892c56da40e1
parent	7579c75d870c25ee075e46a110b6b89cf266db64 (diff)
download	ethos-u-vela-814cfbb8124ba0b3828db2bb12d9342ae9c39f19.tar.gz