diff options
author | Louis Verhaard <louis.verhaard@arm.com> | 2020-08-05 16:11:29 +0200 |
---|---|---|
committer | Louis Verhaard <louis.verhaard@arm.com> | 2020-08-17 15:10:21 +0200 |
commit | 0b8268a0dac80aa22133ca83ed6912d3b565439a (patch) | |
tree | 159fe485c156d6a3f3a1a65ab1b1a24ff68f2849 /ethosu/vela/test | |
parent | 458a208c44f70a9848f1e8e2e91f28ce3641c48f (diff) | |
download | ethos-u-vela-0b8268a0dac80aa22133ca83ed6912d3b565439a.tar.gz |
MLBEDSW-2688: Improved LUT support
- Support for more than one 256-byte LUT in SHRAM
- No DMA is performed for a LUT that is already located in SHRAM
- Added MemArea.Shram, used for LUT, to avoid false address collision
asserts during SRAM tensor allocation
- Added read access to LUT in memory access calculation
Change-Id: If4d1eded5ed029d253f4f5efb2d80495fc3eac99
Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
Diffstat (limited to 'ethosu/vela/test')
-rw-r--r-- | ethosu/vela/test/test_live_range.py | 1 | ||||
-rw-r--r-- | ethosu/vela/test/test_lut.py | 180 | ||||
-rw-r--r-- | ethosu/vela/test/test_model_reader.py | 1 | ||||
-rw-r--r-- | ethosu/vela/test/test_tflite_reader.py | 1 | ||||
-rw-r--r-- | ethosu/vela/test/testutil.py | 70 |
5 files changed, 253 insertions, 0 deletions
diff --git a/ethosu/vela/test/test_live_range.py b/ethosu/vela/test/test_live_range.py index 395d0f3d..d087dd99 100644 --- a/ethosu/vela/test/test_live_range.py +++ b/ethosu/vela/test/test_live_range.py @@ -18,6 +18,7 @@ from unittest.mock import MagicMock import pytest + from ethosu.vela.live_range import LiveRange diff --git a/ethosu/vela/test/test_lut.py b/ethosu/vela/test/test_lut.py new file mode 100644 index 00000000..3b7f57be --- /dev/null +++ b/ethosu/vela/test/test_lut.py @@ -0,0 +1,180 @@ +# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the License); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Description: +# Unit tests for LUT support +import numpy as np + +from ethosu.vela import insert_dma +from ethosu.vela import lut +from ethosu.vela import mark_tensors +from ethosu.vela import pass_packing +from ethosu.vela.data_type import DataType +from ethosu.vela.high_level_command_stream import DMA +from ethosu.vela.nn_graph import Graph +from ethosu.vela.rewrite_graph import verify_graph_health +from ethosu.vela.tensor import create_const_tensor +from ethosu.vela.tensor import TensorPurpose +from ethosu.vela.test import testutil + + +def set_256_lut(op, key): + values = list(range(256)) + lut_tensor = create_const_tensor( + op.name + "_lut", [1, 1, 1, 256], DataType.int8, values, np.uint8, TensorPurpose.LUT + ) + lut_tensor.equivalence_id = lut.create_equivalence_id(key) + op.set_activation_lut(lut_tensor) + + +def set_1K_lut(op, key): + values = list(range(256)) + lut_tensor = create_const_tensor( + op.name + "_lut", [1, 1, 1, 256], DataType.int32, values, np.uint32, TensorPurpose.LUT + ) + lut_tensor.equivalence_id = lut.create_equivalence_id(key) + op.set_activation_lut(lut_tensor) + + +def set_2K_lut(op, key): + values = list(range(512)) + lut_tensor = create_const_tensor( + op.name + "_lut", [1, 1, 1, 512], DataType.int32, values, np.uint32, TensorPurpose.LUT + ) + lut_tensor.equivalence_id = lut.create_equivalence_id(key) + op.set_activation_lut(lut_tensor) + + +def process(arch, op_list): + # Returns subgraph with given operations + nng = Graph() + sg = testutil.create_subgraph(op_list) + nng.subgraphs.append(sg) + assert verify_graph_health(nng) + nng = mark_tensors.mark_tensor_purpose(nng, arch, False) + assert verify_graph_health(nng) + nng = insert_dma.insert_dma_commands(nng, arch, False) + assert verify_graph_health(nng) + pass_packing.pack_into_passes(nng, arch, False) + assert verify_graph_health(nng) + # Create a DMA instruction for every op + cmd_list = [] + for ps in sg.passes: + for intermediate in ps.intermediates: + if intermediate.needs_dma(): + cmd_list.append(DMA(ps, intermediate.get_dma_src_tensor(), intermediate, None)) + sg.high_level_command_stream = cmd_list + return sg + + +def test_optimize_high_level_cmd_stream_2K(): + # Tests lut.optimize_high_level_cmd_stream, blending 256 byte and 2K luts + arch = testutil.create_arch() + shape = [1, 1, 1, 1] + # u8 LUT op, should lead to DMA + op0 = testutil.create_elemwise_op("AddAct", "op0", shape, shape, shape) + set_256_lut(op0, "lut0") + # u8 LUT op, should lead to DMA + op1 = testutil.create_elemwise_op("AddAct", "op1", shape, shape, shape) + set_256_lut(op1, "lut1") + # u8 LUT op with different LUT, should lead to DMA + op2 = testutil.create_elemwise_op("AddAct", "op2", shape, shape, shape) + set_256_lut(op2, "lut2") + # u8 LUT op with same LUT as in op1, should not lead to DMA + op3 = testutil.create_elemwise_op("AddAct", "op3", shape, shape, shape) + set_256_lut(op3, "lut1") + # u8 LUT op with same LUT as in op2, should not lead to DMA + op4 = testutil.create_elemwise_op("AddAct", "op4", shape, shape, shape) + set_256_lut(op4, "lut2") + # 2K LUT op, should lead to DMA, and will overwrite all previous LUTs in SHRAM + op5_2K = testutil.create_elemwise_op("AddAct", "op5", shape, shape, shape) + set_2K_lut(op5_2K, "lut5") + # Another 2K LUT op, should lead to DMA, and will overwrite the previous LUT in SHRAM + op6_2K = testutil.create_elemwise_op("AddAct", "op6", shape, shape, shape) + set_2K_lut(op6_2K, "lut6") + # u8 LUT op with same LUT as in op1, should lead to DMA + op7 = testutil.create_elemwise_op("AddAct", "op7", shape, shape, shape) + set_256_lut(op7, "lut1") + + op_list = [op0, op1, op2, op3, op4, op5_2K, op6_2K, op7] + sg = process(arch, op_list) + orig_cmd_list = sg.high_level_command_stream + sg.high_level_command_stream = orig_cmd_list + lut.optimize_high_level_cmd_stream(sg, arch) + cmd_list = sg.high_level_command_stream + # Check that only the needed DMA commands are left + expected_dma_ops = [op0, op1, op2, op5_2K, op6_2K, op7] + for (cmd, op) in zip(cmd_list, expected_dma_ops): + assert cmd.in_tensor == op.activation_lut + # Check that lut0, lut1 and lut2 in op0, op1, op2 are stored on different addresses + assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[1].out_tensor.address + assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[2].out_tensor.address + assert orig_cmd_list[1].out_tensor.address != orig_cmd_list[2].out_tensor.address + # Check that lut1 in op1 and op3 have same address + assert orig_cmd_list[1].out_tensor.address == orig_cmd_list[3].out_tensor.address + # Check that lut2 in op2 and op4 have same address + assert orig_cmd_list[2].out_tensor.address == orig_cmd_list[4].out_tensor.address + # Check that lut-s for 16 bit (op5 and op6) are stored on same address + assert orig_cmd_list[5].out_tensor.address == orig_cmd_list[6].out_tensor.address + + +def test_optimize_high_level_cmd_stream_1K(): + # Tests lut.optimize_high_level_cmd_stream, blending 256 and 1K luts + arch = testutil.create_arch() + shape = [1, 1, 1, 1] + # u8 LUT op, should lead to DMA + op0 = testutil.create_elemwise_op("AddAct", "op0", shape, shape, shape) + set_256_lut(op0, "lut0") + # u8 LUT op, should lead to DMA + op1 = testutil.create_elemwise_op("AddAct", "op1", shape, shape, shape) + set_256_lut(op1, "lut1") + # 1K LUT op with different LUT, should lead to DMA + op2_1K = testutil.create_elemwise_op("AddAct", "op2", shape, shape, shape) + set_1K_lut(op2_1K, "lut2") + # u8 LUT op with same LUT as in op1, should not lead to DMA + op3 = testutil.create_elemwise_op("AddAct", "op3", shape, shape, shape) + set_256_lut(op3, "lut1") + # 1K LUT op with same LUT as in op2, should not lead to DMA + op4_1K = testutil.create_elemwise_op("AddAct", "op4", shape, shape, shape) + set_1K_lut(op4_1K, "lut2") + # 1K LUT op, should lead to DMA, and will overwrite lut2 + op5_2K = testutil.create_elemwise_op("AddAct", "op5", shape, shape, shape) + set_1K_lut(op5_2K, "lut5") + # u8 LUT op, lut0 should still be present, should not lead to DMA + op6 = testutil.create_elemwise_op("AddAct", "op6", shape, shape, shape) + set_256_lut(op6, "lut0") + # 1K LUT op with same LUT as in op2, should lead to DMA + op7 = testutil.create_elemwise_op("AddAct", "op7", shape, shape, shape) + set_1K_lut(op7, "lut2") + + op_list = [op0, op1, op2_1K, op3, op4_1K, op5_2K, op6, op7] + sg = process(arch, op_list) + orig_cmd_list = sg.high_level_command_stream + sg.high_level_command_stream = orig_cmd_list + lut.optimize_high_level_cmd_stream(sg, arch) + cmd_list = sg.high_level_command_stream + # Check that only the needed DMA commands are left + expected_dma_ops = [op0, op1, op2_1K, op5_2K, op7] + for (cmd, op) in zip(cmd_list, expected_dma_ops): + assert cmd.in_tensor == op.activation_lut + # Check that lut0, lut1 and lut2 in op0, op1, op2 are stored on different addresses + assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[1].out_tensor.address + assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[2].out_tensor.address + assert orig_cmd_list[1].out_tensor.address != orig_cmd_list[2].out_tensor.address + # Check that lut1 in op1 and op3 have same address + assert orig_cmd_list[1].out_tensor.address == orig_cmd_list[3].out_tensor.address + # Check that lut2 in op2 and op4 and op7 have same address + assert orig_cmd_list[2].out_tensor.address == orig_cmd_list[4].out_tensor.address + assert orig_cmd_list[2].out_tensor.address == orig_cmd_list[7].out_tensor.address diff --git a/ethosu/vela/test/test_model_reader.py b/ethosu/vela/test/test_model_reader.py index 23e7e90b..bd7ca377 100644 --- a/ethosu/vela/test/test_model_reader.py +++ b/ethosu/vela/test/test_model_reader.py @@ -16,6 +16,7 @@ # Description: # Unit tests for model_reader. import pytest + from ethosu.vela import model_reader from ethosu.vela.errors import InputFileError diff --git a/ethosu/vela/test/test_tflite_reader.py b/ethosu/vela/test/test_tflite_reader.py index 898e3840..1ba07423 100644 --- a/ethosu/vela/test/test_tflite_reader.py +++ b/ethosu/vela/test/test_tflite_reader.py @@ -16,6 +16,7 @@ # Description: # Contains unit tests for tflite_reader import pytest + from ethosu.vela.tflite_reader import TFLiteSubgraph diff --git a/ethosu/vela/test/testutil.py b/ethosu/vela/test/testutil.py new file mode 100644 index 00000000..116afa40 --- /dev/null +++ b/ethosu/vela/test/testutil.py @@ -0,0 +1,70 @@ +# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the License); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Description: +# Utilities used in vela unit tests +import numpy as np + +from ethosu.vela import architecture_features +from ethosu.vela.data_type import DataType +from ethosu.vela.nn_graph import Subgraph +from ethosu.vela.operation import NpuBlockType +from ethosu.vela.operation import Operation +from ethosu.vela.tensor import create_const_tensor +from ethosu.vela.tensor import MemArea +from ethosu.vela.tensor import Tensor + + +def create_arch(): + return architecture_features.ArchitectureFeatures( + vela_config=None, + system_config=None, + accelerator_config=architecture_features.Accelerator.Ethos_U55_128.value, + permanent_storage=MemArea.OnChipFlash, + override_block_config=None, + block_config_limit=None, + global_memory_clock_scale=1.0, + max_blockdep=0, + softmax_support=True, + ) + + +def create_elemwise_op(type, name, ifm_shape, ifm2_shape, ofm_shape, datatype=DataType.uint8): + # Creates elementwise operation with constant IFM/IFM2 + if datatype.size_in_bytes() == 1: + np_type = np.uint8 + elif datatype.size_in_bytes() == 2: + np_type = np.int16 + else: + np_type = np.int32 + op = Operation(type, name) + op.add_input_tensor(create_const_tensor(name + "_ifm", ifm_shape, datatype, np.zeros(ifm_shape), np_type)) + op.add_input_tensor(create_const_tensor(name + "_ifm2", ifm2_shape, datatype, np.zeros(ifm2_shape), np_type)) + ofm = Tensor(ofm_shape, datatype, name + "_ofm") + op.set_output_tensor(ofm) + op.attrs["npu_block_type"] = NpuBlockType.ElementWise + return op + + +def create_subgraph(op_list): + # Creates subgraph using the given list of operations + sg = Subgraph() + all_inputs = set(tens for op in op_list for tens in op.inputs) + # Reversing, so that the resulting subgraph has same order as op_list + for op in op_list[::-1]: + for tens in op.outputs: + if tens not in all_inputs and tens not in sg.output_tensors: + sg.output_tensors.append(tens) + return sg |