# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the License); you may # not use this file except in compliance with the License. # You may obtain a copy of the License at # # www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an AS IS BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Description: # Unit tests for LUT support import random import numpy as np from ethosu.vela import insert_dma from ethosu.vela import lut from ethosu.vela import mark_tensors from ethosu.vela import pass_packing from ethosu.vela.data_type import DataType from ethosu.vela.high_level_command_stream import DMA from ethosu.vela.nn_graph import Graph from ethosu.vela.operation import Op from ethosu.vela.rewrite_graph import verify_graph_health from ethosu.vela.tensor import create_const_tensor from ethosu.vela.tensor import TensorPurpose from ethosu.vela.test import testutil def set_256_lut(op, key): random.seed(key) values = random.choices(range(256), k=256) lut_tensor = create_const_tensor( op.name + "_lut", [1, 1, 1, 256], DataType.int8, values, np.uint8, TensorPurpose.LUT ) op.set_activation_lut(lut_tensor) def set_1K_lut(op, key): random.seed(key) values = random.choices(range(256), k=256) lut_tensor = create_const_tensor( op.name + "_lut", [1, 1, 1, 256], DataType.int32, values, np.uint32, TensorPurpose.LUT ) op.set_activation_lut(lut_tensor) def set_2K_lut(op, key): random.seed(key) values = random.choices(range(512), k=512) lut_tensor = create_const_tensor( op.name + "_lut", [1, 1, 1, 512], DataType.int32, values, np.uint32, TensorPurpose.LUT ) op.set_activation_lut(lut_tensor) def process(arch, op_list): # Returns subgraph with given operations nng = Graph() sg = testutil.create_subgraph(op_list) nng.subgraphs.append(sg) assert verify_graph_health(nng) nng = mark_tensors.mark_tensor_purpose(nng, arch, False) assert verify_graph_health(nng) nng = insert_dma.insert_dma_commands(nng, arch, False) assert verify_graph_health(nng) pass_packing.pack_into_passes(nng, arch, False) assert verify_graph_health(nng) # Create a DMA instruction for every op cmd_list = [] for ps in sg.passes: for intermediate in ps.intermediates: if intermediate.needs_dma(): cmd_list.append(DMA(ps, intermediate.get_dma_src_tensor(), intermediate, None)) sg.high_level_command_stream = cmd_list return sg def filter_lut_cmds(cmd_list): lut_cmd_list = [] for cmd in cmd_list: if "lut" in cmd.in_tensor.name: lut_cmd_list.append(cmd) return lut_cmd_list def test_optimize_high_level_cmd_stream_2K(): # Tests lut.optimize_high_level_cmd_stream, blending 256 byte and 2K luts arch = testutil.create_arch() shape = [1, 1, 1, 1] # u8 LUT op, should lead to DMA op0 = testutil.create_elemwise_op(Op.Add, "op0", shape, shape, shape) set_256_lut(op0, "lut0") # u8 LUT op, should lead to DMA op1 = testutil.create_elemwise_op(Op.Add, "op1", shape, shape, shape) set_256_lut(op1, "lut1") # u8 LUT op with different LUT, should lead to DMA op2 = testutil.create_elemwise_op(Op.Add, "op2", shape, shape, shape) set_256_lut(op2, "lut2") # u8 LUT op with same LUT as in op1, should not lead to DMA op3 = testutil.create_elemwise_op(Op.Add, "op3", shape, shape, shape) set_256_lut(op3, "lut1") # u8 LUT op with same LUT as in op2, should not lead to DMA op4 = testutil.create_elemwise_op(Op.Add, "op4", shape, shape, shape) set_256_lut(op4, "lut2") # 2K LUT op, should lead to DMA, and will overwrite all previous LUTs in SHRAM op5_2K = testutil.create_elemwise_op(Op.Add, "op5", shape, shape, shape) set_2K_lut(op5_2K, "lut5") # Another 2K LUT op, should lead to DMA, and will overwrite the previous LUT in SHRAM op6_2K = testutil.create_elemwise_op(Op.Add, "op6", shape, shape, shape) set_2K_lut(op6_2K, "lut6") # u8 LUT op with same LUT as in op1, should lead to DMA op7 = testutil.create_elemwise_op(Op.Add, "op7", shape, shape, shape) set_256_lut(op7, "lut1") op_list = [op0, op1, op2, op3, op4, op5_2K, op6_2K, op7] sg = process(arch, op_list) orig_cmd_list = sg.high_level_command_stream sg.high_level_command_stream = orig_cmd_list lut.optimize_high_level_cmd_stream(sg, arch) cmd_list = sg.high_level_command_stream # Check that only the needed DMA commands are left expected_dma_ops = [op0, op1, op2, op5_2K, op6_2K, op7] cmd_list = filter_lut_cmds(cmd_list) orig_cmd_list = filter_lut_cmds(orig_cmd_list) for (cmd, op) in zip(cmd_list, expected_dma_ops): assert cmd.in_tensor == op.activation_lut # Check that lut0, lut1 and lut2 in op0, op1, op2 are stored on different addresses assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[1].out_tensor.address assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[2].out_tensor.address assert orig_cmd_list[1].out_tensor.address != orig_cmd_list[2].out_tensor.address # Check that lut1 in op1 and op3 have same address assert orig_cmd_list[1].out_tensor.address == orig_cmd_list[3].out_tensor.address # Check that lut2 in op2 and op4 have same address assert orig_cmd_list[2].out_tensor.address == orig_cmd_list[4].out_tensor.address # Check that lut-s for 16 bit (op5 and op6) are stored on same address assert orig_cmd_list[5].out_tensor.address == orig_cmd_list[6].out_tensor.address def test_optimize_high_level_cmd_stream_1K(): # Tests lut.optimize_high_level_cmd_stream, blending 256 and 1K luts arch = testutil.create_arch() shape = [1, 1, 1, 1] # u8 LUT op, should lead to DMA op0 = testutil.create_elemwise_op(Op.Add, "op0", shape, shape, shape) set_256_lut(op0, "lut0") # u8 LUT op, should lead to DMA op1 = testutil.create_elemwise_op(Op.Add, "op1", shape, shape, shape) set_256_lut(op1, "lut1") # 1K LUT op with different LUT, should lead to DMA op2_1K = testutil.create_elemwise_op(Op.Add, "op2", shape, shape, shape) set_1K_lut(op2_1K, "lut2") # u8 LUT op with same LUT as in op1, should not lead to DMA op3 = testutil.create_elemwise_op(Op.Add, "op3", shape, shape, shape) set_256_lut(op3, "lut1") # 1K LUT op with same LUT as in op2, should not lead to DMA op4_1K = testutil.create_elemwise_op(Op.Add, "op4", shape, shape, shape) set_1K_lut(op4_1K, "lut2") # 1K LUT op, should lead to DMA, and will overwrite lut2 op5_2K = testutil.create_elemwise_op(Op.Add, "op5", shape, shape, shape) set_1K_lut(op5_2K, "lut5") # u8 LUT op, lut0 should still be present, should not lead to DMA op6 = testutil.create_elemwise_op(Op.Add, "op6", shape, shape, shape) set_256_lut(op6, "lut0") # 1K LUT op with same LUT as in op2, should lead to DMA op7 = testutil.create_elemwise_op(Op.Add, "op7", shape, shape, shape) set_1K_lut(op7, "lut2") op_list = [op0, op1, op2_1K, op3, op4_1K, op5_2K, op6, op7] sg = process(arch, op_list) orig_cmd_list = sg.high_level_command_stream sg.high_level_command_stream = orig_cmd_list lut.optimize_high_level_cmd_stream(sg, arch) cmd_list = sg.high_level_command_stream cmd_list = filter_lut_cmds(cmd_list) orig_cmd_list = filter_lut_cmds(orig_cmd_list) # Check that only the needed DMA commands are left expected_dma_ops = [op0, op1, op2_1K, op5_2K, op7] for (cmd, op) in zip(cmd_list, expected_dma_ops): assert cmd.in_tensor == op.activation_lut # Check that lut0, lut1 and lut2 in op0, op1, op2 are stored on different addresses assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[1].out_tensor.address assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[2].out_tensor.address assert orig_cmd_list[1].out_tensor.address != orig_cmd_list[2].out_tensor.address # Check that lut1 in op1 and op3 have same address assert orig_cmd_list[1].out_tensor.address == orig_cmd_list[3].out_tensor.address # Check that lut2 in op2 and op4 and op7 have same address assert orig_cmd_list[2].out_tensor.address == orig_cmd_list[4].out_tensor.address assert orig_cmd_list[2].out_tensor.address == orig_cmd_list[7].out_tensor.address