aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/test/test_lut.py
diff options
context:
space:
mode:
Diffstat (limited to 'ethosu/vela/test/test_lut.py')
-rw-r--r--ethosu/vela/test/test_lut.py63
1 files changed, 33 insertions, 30 deletions
diff --git a/ethosu/vela/test/test_lut.py b/ethosu/vela/test/test_lut.py
index 44ee0afb..4ddc8b95 100644
--- a/ethosu/vela/test/test_lut.py
+++ b/ethosu/vela/test/test_lut.py
@@ -19,7 +19,6 @@ import random
import numpy as np
-from ethosu.vela import insert_dma
from ethosu.vela import lut
from ethosu.vela import mark_tensors
from ethosu.vela import pass_packing
@@ -27,37 +26,41 @@ from ethosu.vela.data_type import DataType
from ethosu.vela.high_level_command_stream import DMA
from ethosu.vela.nn_graph import Graph
from ethosu.vela.operation import Op
+from ethosu.vela.rewrite_graph import rewrite_graph_pre_order
from ethosu.vela.rewrite_graph import verify_graph_health
from ethosu.vela.tensor import create_const_tensor
from ethosu.vela.tensor import TensorPurpose
from ethosu.vela.test import testutil
-def set_256_lut(op, key):
+def set_256_lut(op, key, arch):
random.seed(key)
values = random.choices(range(256), k=256)
lut_tensor = create_const_tensor(
op.name + "_lut", [1, 1, 1, 256], DataType.int8, values, np.uint8, TensorPurpose.LUT
)
- op.set_activation_lut(lut_tensor)
+ scratch_lut_tensor = lut_tensor.clone_into_fast_storage(arch)
+ op.set_activation_lut(scratch_lut_tensor)
-def set_1K_lut(op, key):
+def set_1K_lut(op, key, arch):
random.seed(key)
values = random.choices(range(256), k=256)
lut_tensor = create_const_tensor(
op.name + "_lut", [1, 1, 1, 256], DataType.int32, values, np.uint32, TensorPurpose.LUT
)
- op.set_activation_lut(lut_tensor)
+ scratch_lut_tensor = lut_tensor.clone_into_fast_storage(arch)
+ op.set_activation_lut(scratch_lut_tensor)
-def set_2K_lut(op, key):
+def set_2K_lut(op, key, arch):
random.seed(key)
values = random.choices(range(512), k=512)
lut_tensor = create_const_tensor(
op.name + "_lut", [1, 1, 1, 512], DataType.int32, values, np.uint32, TensorPurpose.LUT
)
- op.set_activation_lut(lut_tensor)
+ scratch_lut_tensor = lut_tensor.clone_into_fast_storage(arch)
+ op.set_activation_lut(scratch_lut_tensor)
def process(arch, op_list):
@@ -68,16 +71,16 @@ def process(arch, op_list):
assert verify_graph_health(nng)
nng = mark_tensors.mark_tensor_purpose(nng, arch, False)
assert verify_graph_health(nng)
- nng = insert_dma.insert_dma_commands(nng, arch, False)
- assert verify_graph_health(nng)
+ rewrite_graph_pre_order(nng, sg, arch, [], [])
pass_packing.pack_into_passes(nng, arch, False)
assert verify_graph_health(nng)
# Create a DMA instruction for every op
cmd_list = []
for ps in sg.passes:
- for intermediate in ps.intermediates:
- if intermediate.needs_dma():
- cmd_list.append(DMA(ps, intermediate.get_dma_src_tensor(), intermediate, None))
+ for input_tens in ps.inputs:
+ if input_tens.src_tensor:
+ cmd_list.append(DMA(ps, input_tens.src_tensor, input_tens, None))
+
sg.high_level_command_stream = cmd_list
return sg
@@ -96,28 +99,28 @@ def test_optimize_high_level_cmd_stream_2K():
shape = [1, 1, 1, 1]
# u8 LUT op, should lead to DMA
op0 = testutil.create_elemwise_op(Op.Add, "op0", shape, shape, shape)
- set_256_lut(op0, "lut0")
+ set_256_lut(op0, "lut0", arch)
# u8 LUT op, should lead to DMA
op1 = testutil.create_elemwise_op(Op.Add, "op1", shape, shape, shape)
- set_256_lut(op1, "lut1")
+ set_256_lut(op1, "lut1", arch)
# u8 LUT op with different LUT, should lead to DMA
op2 = testutil.create_elemwise_op(Op.Add, "op2", shape, shape, shape)
- set_256_lut(op2, "lut2")
+ set_256_lut(op2, "lut2", arch)
# u8 LUT op with same LUT as in op1, should not lead to DMA
op3 = testutil.create_elemwise_op(Op.Add, "op3", shape, shape, shape)
- set_256_lut(op3, "lut1")
+ set_256_lut(op3, "lut1", arch)
# u8 LUT op with same LUT as in op2, should not lead to DMA
op4 = testutil.create_elemwise_op(Op.Add, "op4", shape, shape, shape)
- set_256_lut(op4, "lut2")
+ set_256_lut(op4, "lut2", arch)
# 2K LUT op, should lead to DMA, and will overwrite all previous LUTs in SHRAM
op5_2K = testutil.create_elemwise_op(Op.Add, "op5", shape, shape, shape)
- set_2K_lut(op5_2K, "lut5")
+ set_2K_lut(op5_2K, "lut5", arch)
# Another 2K LUT op, should lead to DMA, and will overwrite the previous LUT in SHRAM
op6_2K = testutil.create_elemwise_op(Op.Add, "op6", shape, shape, shape)
- set_2K_lut(op6_2K, "lut6")
+ set_2K_lut(op6_2K, "lut6", arch)
# u8 LUT op with same LUT as in op1, should lead to DMA
op7 = testutil.create_elemwise_op(Op.Add, "op7", shape, shape, shape)
- set_256_lut(op7, "lut1")
+ set_256_lut(op7, "lut1", arch)
op_list = [op0, op1, op2, op3, op4, op5_2K, op6_2K, op7]
sg = process(arch, op_list)
@@ -132,7 +135,7 @@ def test_optimize_high_level_cmd_stream_2K():
orig_cmd_list = filter_lut_cmds(orig_cmd_list)
for (cmd, op) in zip(cmd_list, expected_dma_ops):
- assert cmd.in_tensor == op.activation_lut
+ assert cmd.in_tensor == op.activation_lut.src_tensor
# Check that lut0, lut1 and lut2 in op0, op1, op2 are stored on different addresses
assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[1].out_tensor.address
assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[2].out_tensor.address
@@ -151,28 +154,28 @@ def test_optimize_high_level_cmd_stream_1K():
shape = [1, 1, 1, 1]
# u8 LUT op, should lead to DMA
op0 = testutil.create_elemwise_op(Op.Add, "op0", shape, shape, shape)
- set_256_lut(op0, "lut0")
+ set_256_lut(op0, "lut0", arch)
# u8 LUT op, should lead to DMA
op1 = testutil.create_elemwise_op(Op.Add, "op1", shape, shape, shape)
- set_256_lut(op1, "lut1")
+ set_256_lut(op1, "lut1", arch)
# 1K LUT op with different LUT, should lead to DMA
op2_1K = testutil.create_elemwise_op(Op.Add, "op2", shape, shape, shape)
- set_1K_lut(op2_1K, "lut2")
+ set_1K_lut(op2_1K, "lut2", arch)
# u8 LUT op with same LUT as in op1, should not lead to DMA
op3 = testutil.create_elemwise_op(Op.Add, "op3", shape, shape, shape)
- set_256_lut(op3, "lut1")
+ set_256_lut(op3, "lut1", arch)
# 1K LUT op with same LUT as in op2, should not lead to DMA
op4_1K = testutil.create_elemwise_op(Op.Add, "op4", shape, shape, shape)
- set_1K_lut(op4_1K, "lut2")
+ set_1K_lut(op4_1K, "lut2", arch)
# 1K LUT op, should lead to DMA, and will overwrite lut2
op5_2K = testutil.create_elemwise_op(Op.Add, "op5", shape, shape, shape)
- set_1K_lut(op5_2K, "lut5")
+ set_1K_lut(op5_2K, "lut5", arch)
# u8 LUT op, lut0 should still be present, should not lead to DMA
op6 = testutil.create_elemwise_op(Op.Add, "op6", shape, shape, shape)
- set_256_lut(op6, "lut0")
+ set_256_lut(op6, "lut0", arch)
# 1K LUT op with same LUT as in op2, should lead to DMA
op7 = testutil.create_elemwise_op(Op.Add, "op7", shape, shape, shape)
- set_1K_lut(op7, "lut2")
+ set_1K_lut(op7, "lut2", arch)
op_list = [op0, op1, op2_1K, op3, op4_1K, op5_2K, op6, op7]
sg = process(arch, op_list)
@@ -187,7 +190,7 @@ def test_optimize_high_level_cmd_stream_1K():
# Check that only the needed DMA commands are left
expected_dma_ops = [op0, op1, op2_1K, op5_2K, op7]
for (cmd, op) in zip(cmd_list, expected_dma_ops):
- assert cmd.in_tensor == op.activation_lut
+ assert cmd.in_tensor == op.activation_lut.src_tensor
# Check that lut0, lut1 and lut2 in op0, op1, op2 are stored on different addresses
assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[1].out_tensor.address
assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[2].out_tensor.address