aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/lut.py
diff options
context:
space:
mode:
authorJohan Alfven <johan.alfven@arm.com>2024-04-02 16:32:54 +0200
committerJohan Alfven <johan.alfven@arm.com>2024-04-03 19:43:44 +0200
commit55d90dd1f51e95e3b066ab2976b595107cc485c9 (patch)
tree5880ca8e021724367550134621581a8da3bbfbc7 /ethosu/vela/lut.py
parente4d2f218fbdba4aa58380e9dfc42688330a70512 (diff)
downloadethos-u-vela-55d90dd1f51e95e3b066ab2976b595107cc485c9.tar.gz
MLBEDSW-8873: MLCE: Update LUT index calculation
- A network containing several softmax operators caused an output diff - The problem was that the code that detects if the LUT is already in internal SRAM calculated everything correctly except for which lut index to use. - The code should use the slot_size and not then LUT size when calculating the index which fixes this problem. - Updated unit tests Change-Id: I07686651a883ccbba7c173e7191eb21f9ff15bf5 Signed-off-by: Johan Alfven <johan.alfven@arm.com>
Diffstat (limited to 'ethosu/vela/lut.py')
-rw-r--r--ethosu/vela/lut.py8
1 files changed, 4 insertions, 4 deletions
diff --git a/ethosu/vela/lut.py b/ethosu/vela/lut.py
index ab440e63..7b563b08 100644
--- a/ethosu/vela/lut.py
+++ b/ethosu/vela/lut.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2020-2021, 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2020-2021, 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -83,7 +83,7 @@ class LUTState:
def get_lut_index(arch, lut_tensor):
# Returns the index in SHRAM where the given LUT is stored, a value between 0 and 8
- slot = (lut_tensor.address - arch.shram_lut_address) // lut_tensor.storage_size()
+ slot = (lut_tensor.address - arch.shram_lut_address) // arch.shram_lut_slot_size
assert 0 <= slot < 8
return slot
@@ -107,7 +107,6 @@ def optimize_high_level_cmd_stream(sg, arch):
# - Removes unnecessary DMA operations of LUT-s that are already present in SHRAM from sg's command stream
cmd_stream = [] # will contain existing command stream minus unneeded DMA operations
lut_state = LUTState()
- slot_size = 256
lut_start = arch.shram_lut_address
lut_end = lut_start + arch.shram_lut_size
for cmd in sg.high_level_command_stream:
@@ -131,9 +130,10 @@ def optimize_high_level_cmd_stream(sg, arch):
# Place the LUT in the last 2 blocks of SHRAM
# Alignment is always on the size of the LUT, 256 for 256-byte LUT, 1K for 1K LUT, etc
address = lut_state.find_best_address(lut_start, lut_end, lut_tens.storage_size())
+
lut_tens.equivalence_id = uuid.uuid4()
lut_tens.address = address
- cmd.ps.primary_op.activation.lut_index = (address - lut_start) // slot_size
+ cmd.ps.primary_op.activation.lut_index = (address - lut_start) // arch.shram_lut_slot_size
lut_state = lut_state.put(lut_tens)
cmd_stream.append(cmd)
sg.high_level_command_stream = cmd_stream