diff options
author | Johan Alfven <johan.alfven@arm.com> | 2024-04-02 16:32:54 +0200 |
---|---|---|
committer | Johan Alfven <johan.alfven@arm.com> | 2024-04-03 19:43:44 +0200 |
commit | 55d90dd1f51e95e3b066ab2976b595107cc485c9 (patch) | |
tree | 5880ca8e021724367550134621581a8da3bbfbc7 | |
parent | e4d2f218fbdba4aa58380e9dfc42688330a70512 (diff) | |
download | ethos-u-vela-55d90dd1f51e95e3b066ab2976b595107cc485c9.tar.gz |
MLBEDSW-8873: MLCE: Update LUT index calculation
- A network containing several softmax operators caused an
output diff
- The problem was that the code that detects if the LUT is
already in internal SRAM calculated everything correctly except
for which lut index to use.
- The code should use the slot_size and not then LUT size when
calculating the index which fixes this problem.
- Updated unit tests
Change-Id: I07686651a883ccbba7c173e7191eb21f9ff15bf5
Signed-off-by: Johan Alfven <johan.alfven@arm.com>
-rw-r--r-- | ethosu/vela/architecture_features.py | 1 | ||||
-rw-r--r-- | ethosu/vela/lut.py | 8 | ||||
-rw-r--r-- | ethosu/vela/test/test_lut.py | 14 |
3 files changed, 18 insertions, 5 deletions
diff --git a/ethosu/vela/architecture_features.py b/ethosu/vela/architecture_features.py index 5c485bb..44b28c6 100644 --- a/ethosu/vela/architecture_features.py +++ b/ethosu/vela/architecture_features.py @@ -397,6 +397,7 @@ class ArchitectureFeatures: self.shram_total_banks = accel_config.shram_banks - self.shram_reserved_unused_banks self.shram_bank_granules = np.array(accel_config.shram_granules, np.int32) self.shram_lut_size = 2048 + self.shram_lut_slot_size = 256 # SHRAM base address of the activation lookup table self.shram_lut_address = self.shram_bank_size * self.available_shram_banks(True) diff --git a/ethosu/vela/lut.py b/ethosu/vela/lut.py index ab440e6..7b563b0 100644 --- a/ethosu/vela/lut.py +++ b/ethosu/vela/lut.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2020-2021, 2023 Arm Limited and/or its affiliates <open-source-office@arm.com> +# SPDX-FileCopyrightText: Copyright 2020-2021, 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com> # # SPDX-License-Identifier: Apache-2.0 # @@ -83,7 +83,7 @@ class LUTState: def get_lut_index(arch, lut_tensor): # Returns the index in SHRAM where the given LUT is stored, a value between 0 and 8 - slot = (lut_tensor.address - arch.shram_lut_address) // lut_tensor.storage_size() + slot = (lut_tensor.address - arch.shram_lut_address) // arch.shram_lut_slot_size assert 0 <= slot < 8 return slot @@ -107,7 +107,6 @@ def optimize_high_level_cmd_stream(sg, arch): # - Removes unnecessary DMA operations of LUT-s that are already present in SHRAM from sg's command stream cmd_stream = [] # will contain existing command stream minus unneeded DMA operations lut_state = LUTState() - slot_size = 256 lut_start = arch.shram_lut_address lut_end = lut_start + arch.shram_lut_size for cmd in sg.high_level_command_stream: @@ -131,9 +130,10 @@ def optimize_high_level_cmd_stream(sg, arch): # Place the LUT in the last 2 blocks of SHRAM # Alignment is always on the size of the LUT, 256 for 256-byte LUT, 1K for 1K LUT, etc address = lut_state.find_best_address(lut_start, lut_end, lut_tens.storage_size()) + lut_tens.equivalence_id = uuid.uuid4() lut_tens.address = address - cmd.ps.primary_op.activation.lut_index = (address - lut_start) // slot_size + cmd.ps.primary_op.activation.lut_index = (address - lut_start) // arch.shram_lut_slot_size lut_state = lut_state.put(lut_tens) cmd_stream.append(cmd) sg.high_level_command_stream = cmd_stream diff --git a/ethosu/vela/test/test_lut.py b/ethosu/vela/test/test_lut.py index e52b489..70b7147 100644 --- a/ethosu/vela/test/test_lut.py +++ b/ethosu/vela/test/test_lut.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2020-2021, 2023 Arm Limited and/or its affiliates <open-source-office@arm.com> +# SPDX-FileCopyrightText: Copyright 2020-2021, 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com> # # SPDX-License-Identifier: Apache-2.0 # @@ -131,14 +131,20 @@ def test_optimize_high_level_cmd_stream_2K(): assert cmd.in_tensor == op.activation_lut.src_tensor # Check that lut0, lut1 and lut2 in op0, op1, op2 are stored on different addresses assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[1].out_tensor.address + assert orig_cmd_list[0].ps.primary_op.activation.lut_index != orig_cmd_list[1].ps.primary_op.activation.lut_index assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[2].out_tensor.address + assert orig_cmd_list[0].ps.primary_op.activation.lut_index != orig_cmd_list[2].ps.primary_op.activation.lut_index assert orig_cmd_list[1].out_tensor.address != orig_cmd_list[2].out_tensor.address + assert orig_cmd_list[1].ps.primary_op.activation.lut_index != orig_cmd_list[2].ps.primary_op.activation.lut_index # Check that lut1 in op1 and op3 have same address assert orig_cmd_list[1].out_tensor.address == orig_cmd_list[3].out_tensor.address + assert orig_cmd_list[1].ps.primary_op.activation.lut_index == orig_cmd_list[3].ps.primary_op.activation.lut_index # Check that lut2 in op2 and op4 have same address assert orig_cmd_list[2].out_tensor.address == orig_cmd_list[4].out_tensor.address + assert orig_cmd_list[2].ps.primary_op.activation.lut_index == orig_cmd_list[4].ps.primary_op.activation.lut_index # Check that lut-s for 16 bit (op5 and op6) are stored on same address assert orig_cmd_list[5].out_tensor.address == orig_cmd_list[6].out_tensor.address + assert orig_cmd_list[5].ps.primary_op.activation.lut_index == orig_cmd_list[6].ps.primary_op.activation.lut_index def test_optimize_high_level_cmd_stream_1K(): @@ -186,10 +192,16 @@ def test_optimize_high_level_cmd_stream_1K(): assert cmd.in_tensor == op.activation_lut.src_tensor # Check that lut0, lut1 and lut2 in op0, op1, op2 are stored on different addresses assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[1].out_tensor.address + assert orig_cmd_list[0].ps.primary_op.activation.lut_index != orig_cmd_list[1].ps.primary_op.activation.lut_index assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[2].out_tensor.address + assert orig_cmd_list[0].ps.primary_op.activation.lut_index != orig_cmd_list[2].ps.primary_op.activation.lut_index assert orig_cmd_list[1].out_tensor.address != orig_cmd_list[2].out_tensor.address + assert orig_cmd_list[1].ps.primary_op.activation.lut_index != orig_cmd_list[2].ps.primary_op.activation.lut_index # Check that lut1 in op1 and op3 have same address assert orig_cmd_list[1].out_tensor.address == orig_cmd_list[3].out_tensor.address + assert orig_cmd_list[1].ps.primary_op.activation.lut_index == orig_cmd_list[3].ps.primary_op.activation.lut_index # Check that lut2 in op2 and op4 and op7 have same address assert orig_cmd_list[2].out_tensor.address == orig_cmd_list[4].out_tensor.address + assert orig_cmd_list[2].ps.primary_op.activation.lut_index == orig_cmd_list[4].ps.primary_op.activation.lut_index assert orig_cmd_list[2].out_tensor.address == orig_cmd_list[7].out_tensor.address + assert orig_cmd_list[2].ps.primary_op.activation.lut_index == orig_cmd_list[7].ps.primary_op.activation.lut_index |