aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/test/extapi/test_extapi_generate_commands.py
diff options
context:
space:
mode:
Diffstat (limited to 'ethosu/vela/test/extapi/test_extapi_generate_commands.py')
-rw-r--r--ethosu/vela/test/extapi/test_extapi_generate_commands.py137
1 files changed, 130 insertions, 7 deletions
diff --git a/ethosu/vela/test/extapi/test_extapi_generate_commands.py b/ethosu/vela/test/extapi/test_extapi_generate_commands.py
index 441c4a4f..6284faa3 100644
--- a/ethosu/vela/test/extapi/test_extapi_generate_commands.py
+++ b/ethosu/vela/test/extapi/test_extapi_generate_commands.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2020-2021, 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -42,6 +42,8 @@ from ethosu.vela.api import NpuShape3D
from ethosu.vela.api import NpuTileBox
from ethosu.vela.architecture_features import Accelerator
from ethosu.vela.architecture_features import create_default_arch
+from ethosu.vela.errors import ByteAlignmentError
+from ethosu.vela.errors import ByteSizeError
from ethosu.vela.errors import VelaError
from ethosu.vela.ethos_u55_regs.ethos_u55_regs import cmd0
from ethosu.vela.ethos_u55_regs.ethos_u55_regs import cmd1
@@ -49,6 +51,7 @@ from ethosu.vela.high_level_command_to_npu_op import BasePointerIndex
from ethosu.vela.high_level_command_to_npu_op import get_mem_limits_for_regions
from ethosu.vela.register_command_stream_generator import CmdMode
from ethosu.vela.register_command_stream_generator import generate_command_stream
+from ethosu.vela.register_command_stream_util import BASE_PTR_INDEX_MEM2MEM
from ethosu.vela.register_command_stream_util import get_address_ranges
@@ -380,10 +383,10 @@ def test_mul_with_broadcast_and_relu():
def create_avg_pool_op() -> NpuPoolingOperation:
op = NpuPoolingOperation(NpuPoolingOp.AVERAGE)
op.ifm = create_feature_map(
- NpuShape3D(height=29, width=30, depth=27), 2, 0, quant=NpuQuantization(scale_f32=0.007843138, zero_point=128)
+ NpuShape3D(height=32, width=30, depth=28), 2, 0, quant=NpuQuantization(scale_f32=0.007843138, zero_point=128)
)
op.ofm = create_feature_map(
- NpuShape3D(height=10, width=10, depth=27),
+ NpuShape3D(height=10, width=10, depth=28),
2,
0x5BD0,
quant=NpuQuantization(scale_f32=0.20392157, zero_point=128),
@@ -778,25 +781,25 @@ def test_check_mem_limits():
# Tests that no code is generated with addresses out of bounds
conv_op = create_fully_connected_op()
# bias with end address out of range
- conv_op.biases = [NpuAddressRange(region=0, address=(1 << 32) - 16, length=1000)]
+ conv_op.biases = [NpuAddressRange(region=0, address=(1 << 32) - 16, length=1024)]
with pytest.raises(VelaError):
npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U55_64)
# same test should pass with Ethos_U65_512
npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U65_512)
# weights with end address out of range
conv_op = create_fully_connected_op()
- conv_op.weights = [NpuAddressRange(region=0, address=(1 << 40) - 960, length=1000)]
+ conv_op.weights = [NpuAddressRange(region=0, address=(1 << 40) - 960, length=1024)]
with pytest.raises(VelaError):
npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U65_256)
# bias with high end address, but still within range
addr = (1 << 40) - 1024
conv_op = create_fully_connected_op()
- conv_op.biases = [NpuAddressRange(region=0, address=addr, length=1000)]
+ conv_op.biases = [NpuAddressRange(region=0, address=addr, length=1024)]
cmds = npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U65_512)
check_cmd1(cmds, cmd1.NPU_SET_SCALE_BASE, addr & ((1 << 32) - 1), (addr >> 32) & ((1 << 16) - 1))
conv_op = create_fully_connected_op()
# weights with negative address
- conv_op.weights = [NpuAddressRange(region=0, address=-16, length=1000)]
+ conv_op.weights = [NpuAddressRange(region=0, address=-16, length=1024)]
with pytest.raises(VelaError):
npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U55_32)
op = create_avg_pool_op()
@@ -811,6 +814,126 @@ def test_check_mem_limits():
npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U55_64)
+def test_cmd1_payload_legality():
+ # Tests payload legality
+
+ # Test Bias and weight payload legality
+ # Illegal bias length fails
+ conv_op = create_fully_connected_op()
+ conv_op.biases = [NpuAddressRange(region=0, address=111, length=24)]
+ with pytest.raises(ByteSizeError):
+ npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U55_64)
+ # Legal bias length passes
+ conv_op.biases = [NpuAddressRange(region=0, address=111, length=32)]
+ npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U55_64)
+
+ # Illegal weight length fails
+ conv_op = create_fully_connected_op()
+ conv_op.weights = [NpuAddressRange(region=0, address=128, length=24)]
+ with pytest.raises(ByteSizeError):
+ npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U55_64)
+ # Legal weight length passes
+ conv_op.weights = [NpuAddressRange(region=0, address=128, length=32)]
+ npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U55_64)
+
+ # Unaligned weight adress fails
+ conv_op = create_fully_connected_op()
+ conv_op.weights = [NpuAddressRange(region=0, address=120, length=32)]
+ with pytest.raises(ByteAlignmentError):
+ npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U55_64)
+ # Aligned weight length already tested
+
+ # Test DMA payload legality
+ # Illegal dma length Ethos-U55 fails
+ dest = NpuAddressRange(BASE_PTR_INDEX_MEM2MEM, 256, 120)
+ src = NpuAddressRange(0, 512, 120)
+ dma_op = NpuDmaOperation(src, dest)
+ with pytest.raises(ByteSizeError):
+ npu_generate_register_command_stream([dma_op], NpuAccelerator.Ethos_U55_64)
+
+ # Legal dma length U55 passes
+ dest = NpuAddressRange(BASE_PTR_INDEX_MEM2MEM, 256, 128)
+ src = NpuAddressRange(0, 512, 128)
+ dma_op = NpuDmaOperation(src, dest)
+ npu_generate_register_command_stream([dma_op], NpuAccelerator.Ethos_U55_64)
+
+ # Length not a multiple of 16, Ethos-U65, internal dma destination, fails
+ dest = NpuAddressRange(BASE_PTR_INDEX_MEM2MEM, 256, 120)
+ src = NpuAddressRange(0, 512, 120)
+ dma_op = NpuDmaOperation(src, dest)
+ with pytest.raises(ByteSizeError):
+ npu_generate_register_command_stream([dma_op], NpuAccelerator.Ethos_U65_256)
+ # Length not a multiple of 16, Ethos-U65, external dma destination passes
+ dest = NpuAddressRange(2, 256, 120)
+ src = NpuAddressRange(0, 512, 120)
+ dma_op = NpuDmaOperation(src, dest)
+ npu_generate_register_command_stream([dma_op], NpuAccelerator.Ethos_U65_256)
+
+ # Test fm stride payload legality
+ ifm_shape = NpuShape3D(height=30, width=62, depth=46)
+ address = 512
+ op = NpuConv2DOperation()
+ op.ifm = create_feature_map(
+ ifm_shape,
+ 1,
+ address,
+ quant=NpuQuantization(scale_f32=0.007843138, zero_point=128),
+ dtype=NpuDataType.INT16,
+ )
+ op.ofm = create_feature_map(
+ NpuShape3D(height=30, width=31, depth=46),
+ 1,
+ 0x14E40,
+ quant=NpuQuantization(scale_f32=0.20392157, zero_point=128),
+ dtype=NpuDataType.INT16,
+ )
+ op.kernel = NpuKernel(3, 2, 2, 1)
+ op.weights = [NpuAddressRange(region=0, address=0, length=7696)]
+ op.biases = [NpuAddressRange(region=0, address=32000, length=464)]
+ op.padding = NpuPadding(top=0, left=0, right=1, bottom=1)
+ op.block_traversal = NpuBlockTraversal.PART_KERNEL_FIRST
+ op.block_config = NpuShape3D(height=16, width=4, depth=16)
+
+ # NHWC depth stride not a multiple of 32 passes
+ op.ifm.strides = NpuShape3D(depth=16, height=2, width=16)
+ npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U65_256)
+
+ # Same depth stride fails for NHCWB16
+ op.ifm = create_feature_map(
+ ifm_shape,
+ 1,
+ address,
+ quant=NpuQuantization(scale_f32=0.007843138, zero_point=128),
+ layout=NpuLayout.NHCWB16,
+ dtype=NpuDataType.INT16,
+ )
+ op.ifm.strides = NpuShape3D(depth=16, height=2, width=16)
+ with pytest.raises(ByteSizeError):
+ npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U65_256)
+
+ # Test fm adress payload alignment
+
+ # Unaligned adress fails
+ op.ifm = create_feature_map(
+ ifm_shape,
+ 1,
+ address,
+ quant=NpuQuantization(scale_f32=0.007843138, zero_point=128),
+ layout=NpuLayout.NHCWB16,
+ dtype=NpuDataType.INT16,
+ )
+ op.ifm.tiles = NpuTileBox(
+ width_0=ifm_shape.width, height_0=ifm_shape.height, height_1=ifm_shape.height, addresses=[address, 16, 16, 24]
+ )
+ with pytest.raises(ByteAlignmentError):
+ npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U65_256)
+ # Aligned address passes
+ op.ifm.tiles = NpuTileBox(
+ width_0=ifm_shape.width, height_0=ifm_shape.height, height_1=ifm_shape.height, addresses=[address, 16, 16, 16]
+ )
+ npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U65_256)
+
+
def test_check_sram_limit_spilling():
# Tests that no code is generated with addresses outside available sram spilling range
arch = create_default_arch(Accelerator.Ethos_U65_512)