aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWilliam Isaksson <william.isaksson@arm.com>2023-06-19 15:31:46 +0000
committerFredrik Svedberg <fredrik.svedberg@arm.com>2023-07-31 14:09:38 +0000
commita4f8411f870defaba52175717b40afdd41ae0d40 (patch)
treeff4e9b53395405f7091d2aef856f2bc2863befef
parent2f9b6874a227d8fa056c2e2fd01e8c80824ee0bc (diff)
downloadethos-u-vela-a4f8411f870defaba52175717b40afdd41ae0d40.tar.gz
MLBEDSW-7718: Add cmd1 payload legality checks
- checks that cmd1 payloads are legal in register_command_stream_generator, - adds unit tests Change-Id: I2bc23147f60fe090c71703f08d9cbaa279fac86e Signed-off-by: William Isaksson <william.isaksson@arm.com>
-rw-r--r--ethosu/vela/errors.py16
-rw-r--r--ethosu/vela/register_command_stream_generator.py57
-rw-r--r--ethosu/vela/register_command_stream_util.py55
-rw-r--r--ethosu/vela/test/extapi/test_extapi_generate_commands.py137
4 files changed, 244 insertions, 21 deletions
diff --git a/ethosu/vela/errors.py b/ethosu/vela/errors.py
index bf3bb4d..22fc17c 100644
--- a/ethosu/vela/errors.py
+++ b/ethosu/vela/errors.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2020-2021 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2020-2021, 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -75,3 +75,17 @@ class AllocationError(VelaError):
def __init__(self, msg):
super().__init__(f"Allocation failed: {msg}")
+
+
+class ByteAlignmentError(VelaError):
+ """Raised when value is unaligned"""
+
+ def __init__(self, msg):
+ super().__init__(f"Unaligned Value: {msg}")
+
+
+class ByteSizeError(VelaError):
+ """Raised when size has illegal value"""
+
+ def __init__(self, msg):
+ super().__init__(f"Illegal Size: {msg}")
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index 6001a3b..71fec3b 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -60,6 +60,8 @@ from .architecture_features import Accelerator
from .architecture_features import ArchitectureFeatures
from .architecture_features import create_default_arch
from .architecture_features import SHRAMElements
+from .errors import ByteAlignmentError
+from .errors import ByteSizeError
from .errors import VelaError
from .ethos_u55_regs.ethos_u55_regs import acc_format
from .ethos_u55_regs.ethos_u55_regs import activation
@@ -76,6 +78,11 @@ from .operation import NpuBlockType
from .range_set import MemoryAccessSet
from .register_command_stream_util import BASE_PTR_INDEX_MEM2MEM
from .register_command_stream_util import calc_blockdep
+from .register_command_stream_util import check_addresses
+from .register_command_stream_util import check_alignment
+from .register_command_stream_util import check_dma_op
+from .register_command_stream_util import check_size
+from .register_command_stream_util import check_strides
from .register_command_stream_util import get_dma_memory_accesses
from .register_command_stream_util import get_op_memory_accesses
from .register_command_stream_util import get_strides
@@ -335,11 +342,16 @@ def generate_activation(emit: CommandStreamEmitter, activation: Optional[NpuActi
emit.cmd0_with_param(cmd0.NPU_SET_ACTIVATION_MAX, quantized_max)
-def generate_addresses(emit: CommandStreamEmitter, ptr_cmds: List[cmd1], addresses: List[int], layout: NpuLayout):
+def generate_addresses(
+ emit: CommandStreamEmitter,
+ ptr_cmds: List[cmd1],
+ addresses: List[int],
+ layout: NpuLayout,
+ element_size,
+ arch: ArchitectureFeatures,
+):
"""Generates xFM_BASE registers"""
- if layout == NpuLayout.NHCWB16:
- # Check that all BasePointer addresses are aligned to 16 bytes
- assert all((int(addr) % 16) == 0 for addr in addresses)
+ check_addresses(addresses, layout, element_size, arch)
for i in range(4):
emit.cmd1_with_address(ptr_cmds[i], addresses[i])
@@ -356,6 +368,8 @@ def generate_strides(
):
"""Generates STRIDE_C/Y/X registers"""
strides = get_strides(fm)
+ check_strides(fm, strides)
+
emit.cmd1_with_address(stride_c_cmd, strides.depth) # stride between 16-byte channel blocks (C)
emit.cmd1_with_address(stride_y_cmd, strides.height) # stride between vertical values (H)
emit.cmd1_with_address(stride_x_cmd, strides.width) # stride between horisontal values (W)
@@ -420,7 +434,7 @@ def generate_ifm2_broadcast(emit: CommandStreamEmitter, npu_op: NpuElementWiseOp
emit.cmd0_with_param(cmd0.NPU_SET_IFM2_BROADCAST, ifm2_broadcast)
-def generate_ifm(emit: CommandStreamEmitter, ifm: NpuFeatureMap):
+def generate_ifm(emit: CommandStreamEmitter, ifm: NpuFeatureMap, arch: ArchitectureFeatures):
"""Generates general IFM registers"""
emit.cmd0_with_param(cmd0.NPU_SET_IFM_REGION, ifm.region)
generate_addresses(
@@ -428,6 +442,8 @@ def generate_ifm(emit: CommandStreamEmitter, ifm: NpuFeatureMap):
[cmd1.NPU_SET_IFM_BASE0, cmd1.NPU_SET_IFM_BASE1, cmd1.NPU_SET_IFM_BASE2, cmd1.NPU_SET_IFM_BASE3],
ifm.tiles.addresses,
ifm.layout,
+ ifm.data_type.size_in_bytes(),
+ arch,
)
generate_tiles(
emit, [cmd0.NPU_SET_IFM_HEIGHT0_M1, cmd0.NPU_SET_IFM_HEIGHT1_M1, cmd0.NPU_SET_IFM_WIDTH0_M1], ifm.tiles
@@ -437,7 +453,7 @@ def generate_ifm(emit: CommandStreamEmitter, ifm: NpuFeatureMap):
emit.cmd0_with_param(cmd0.NPU_SET_IFM_ZERO_POINT, get_zero_point(ifm))
-def generate_ifm2(emit: CommandStreamEmitter, ifm2: NpuFeatureMap, has_scalar: bool):
+def generate_ifm2(emit: CommandStreamEmitter, ifm2: NpuFeatureMap, has_scalar: bool, arch: ArchitectureFeatures):
"""Generates general IFM2 registers"""
if not has_scalar:
emit.cmd0_with_param(cmd0.NPU_SET_IFM2_REGION, ifm2.region)
@@ -446,6 +462,8 @@ def generate_ifm2(emit: CommandStreamEmitter, ifm2: NpuFeatureMap, has_scalar: b
[cmd1.NPU_SET_IFM2_BASE0, cmd1.NPU_SET_IFM2_BASE1, cmd1.NPU_SET_IFM2_BASE2, cmd1.NPU_SET_IFM2_BASE3],
ifm2.tiles.addresses,
ifm2.layout,
+ ifm2.data_type.size_in_bytes(),
+ arch,
)
generate_tiles(
emit, [cmd0.NPU_SET_IFM2_HEIGHT0_M1, cmd0.NPU_SET_IFM2_HEIGHT1_M1, cmd0.NPU_SET_IFM2_WIDTH0_M1], ifm2.tiles
@@ -454,7 +472,7 @@ def generate_ifm2(emit: CommandStreamEmitter, ifm2: NpuFeatureMap, has_scalar: b
emit.cmd0_with_param(cmd0.NPU_SET_IFM2_ZERO_POINT, get_zero_point(ifm2))
-def generate_ofm(emit: CommandStreamEmitter, ofm: NpuFeatureMap):
+def generate_ofm(emit: CommandStreamEmitter, ofm: NpuFeatureMap, arch: ArchitectureFeatures):
"""Generates general OFM registers"""
emit.cmd0_with_param(cmd0.NPU_SET_OFM_REGION, ofm.region)
generate_addresses(
@@ -462,6 +480,8 @@ def generate_ofm(emit: CommandStreamEmitter, ofm: NpuFeatureMap):
[cmd1.NPU_SET_OFM_BASE0, cmd1.NPU_SET_OFM_BASE1, cmd1.NPU_SET_OFM_BASE2, cmd1.NPU_SET_OFM_BASE3],
ofm.tiles.addresses,
ofm.layout,
+ ofm.data_type.size_in_bytes(),
+ arch,
)
generate_tiles(
emit, [cmd0.NPU_SET_OFM_HEIGHT0_M1, cmd0.NPU_SET_OFM_HEIGHT1_M1, cmd0.NPU_SET_OFM_WIDTH0_M1], ofm.tiles
@@ -505,9 +525,12 @@ def generate_weights(emit: CommandStreamEmitter, weights: List[NpuAddressRange],
]
):
if core < len(weights):
+ check_alignment(weights[core].address, 16)
+ check_size(weights[core].length, 16)
emit.cmd1_with_address(addr, weights[core].address)
emit.cmd1_with_offset(length, weights[core].length)
elif core < arch.ncores:
+ check_alignment(weights[0].address, 16)
emit.cmd1_with_address(addr, weights[0].address)
emit.cmd1_with_offset(length, 0)
@@ -523,6 +546,7 @@ def generate_biases(emit: CommandStreamEmitter, biases: List[NpuAddressRange], a
):
if core < len(biases):
emit.cmd1_with_address(addr, biases[core].address)
+ check_size(biases[core].length, 16)
emit.cmd1_with_offset(length, biases[core].length)
elif core < arch.ncores:
emit.cmd1_with_address(addr, biases[0].address)
@@ -631,12 +655,12 @@ def generate_common(
):
"""Generate registers that are common to most operations"""
assert npu_op.ifm is not None and npu_op.ofm is not None
- generate_ifm(emit, npu_op.ifm)
+ generate_ifm(emit, npu_op.ifm, arch)
generate_ifm_precision(emit, npu_op.ifm, op_to_scale, cmd0.NPU_SET_IFM_PRECISION)
emit.cmd0_with_param(cmd0.NPU_SET_IFM_UPSCALE, resampling_mode_map[npu_op.ifm_upscale])
if npu_op.padding is not None:
generate_padding(emit, npu_op.padding)
- generate_ofm(emit, npu_op.ofm)
+ generate_ofm(emit, npu_op.ofm, arch)
generate_ofm_precision(emit, npu_op, use_global_scale)
if npu_op.op_type != NpuOperationType.ElementWise:
assert npu_op.kernel is not None
@@ -974,7 +998,7 @@ def generate_elementwise_op(emit: CommandStreamEmitter, npu_op: NpuElementWiseOp
# Binary operation; generate IFM2 registers
assert npu_op.ifm2 is not None
has_scalar = npu_op.ifm2_scalar is not None
- generate_ifm2(emit, npu_op.ifm2, has_scalar)
+ generate_ifm2(emit, npu_op.ifm2, has_scalar, arch)
generate_ifm_precision(emit, npu_op.ifm2, 0, cmd0.NPU_SET_IFM2_PRECISION)
generate_ifm2_broadcast(emit, npu_op)
if has_scalar:
@@ -983,8 +1007,10 @@ def generate_elementwise_op(emit: CommandStreamEmitter, npu_op: NpuElementWiseOp
emit.cmd0_with_param(cmd0.NPU_SET_IFM2_SCALAR, quantized_scalar)
-def generate_dma_op(emit: CommandStreamEmitter, dma_op: NpuDmaOperation):
+def generate_dma_op(emit: CommandStreamEmitter, dma_op: NpuDmaOperation, arch: ArchitectureFeatures):
"""Generates register commands for DMA operations"""
+ check_dma_op(dma_op, arch)
+
emit.cmd0_with_param(cmd0.NPU_SET_DMA0_SRC_REGION, dma_op.src.region)
emit.cmd1_with_address(cmd1.NPU_SET_DMA0_SRC, dma_op.src.address)
emit.cmd0_with_param(cmd0.NPU_SET_DMA0_DST_REGION, dma_op.dest.region)
@@ -1007,7 +1033,7 @@ def generate_registers_for_op(emit: CommandStreamEmitter, npu_op: NpuOperation,
elif isinstance(npu_op, NpuElementWiseOperation):
generate_elementwise_op(emit, npu_op, arch)
elif isinstance(npu_op, NpuDmaOperation):
- generate_dma_op(emit, npu_op)
+ generate_dma_op(emit, npu_op, arch)
else:
assert 0, "Unsupported operation"
@@ -1048,8 +1074,13 @@ def generate_command_stream(
check_mem_limits(memory_accesses[npu_op], mem_limits)
cmd_waits = get_wait_dependency(arch, npu_op, memory_accesses, outstanding_dma_ops, outstanding_npu_ops)
generate_registers_for_op(emit, npu_op, arch)
+ except ByteAlignmentError as e:
+ # Enables testing for ByteAlignmentErrors specifically
+ raise ByteAlignmentError(f"{e.error_msg}, in operation {op_index}:{npu_op.op_type.name}") from None
+ except ByteSizeError as e:
+ # Enables testing for ByteSizeErrors specifically
+ raise ByteSizeError(f"{e.error_msg}, in operation {op_index}:{npu_op.op_type.name}") from None
except VelaError as e:
- # Add operation info and rethrow
raise VelaError(f"{e.error_msg}, in operation {op_index}:{npu_op.op_type.name}") from None
if not isinstance(npu_op, NpuDmaOperation) and isinstance(npu_op, NpuBlockOperation):
# Generate BLOCKDEP
diff --git a/ethosu/vela/register_command_stream_util.py b/ethosu/vela/register_command_stream_util.py
index b131f64..c7050a3 100644
--- a/ethosu/vela/register_command_stream_util.py
+++ b/ethosu/vela/register_command_stream_util.py
@@ -37,12 +37,16 @@ from .api import NpuShape3D
from .architecture_features import ArchitectureFeatures
from .architecture_features import Block
from .architecture_features import Rect
+from .errors import ByteAlignmentError
+from .errors import ByteSizeError
from .operation import Kernel
from .operation import PointXYZ
+from .tensor import TensorFormat
from ethosu.vela.range_set import AccessDirection
from ethosu.vela.range_set import MemoryAccessSet
from ethosu.vela.range_set import MemoryRangeSet
+
# base address slot for memory to memory transfer
BASE_PTR_INDEX_MEM2MEM = int((1 << 8) | (3 << 0))
@@ -50,6 +54,18 @@ BASE_PTR_INDEX_MEM2MEM = int((1 << 8) | (3 << 0))
UNARY_ELEMWISE_OPS = (NpuElementWiseOp.ABS, NpuElementWiseOp.LRELU, NpuElementWiseOp.CLZ)
+def check_alignment(payload, required_alignment):
+ # assuming payload is defined in bytes
+ if payload % required_alignment != 0:
+ raise ByteAlignmentError(f"Cmd1 payload of size: {payload} Bytes is not {required_alignment}-byte aligned")
+
+
+def check_size(payload, required_multiple):
+ # assuming payload is defined in bytes
+ if payload % required_multiple != 0:
+ raise ByteSizeError(f"Cmd1 payload of size: {payload} Bytes is not a multiple of {required_multiple}")
+
+
def to_npu_kernel(kernel: Kernel) -> NpuKernel:
"""Converts the given internally used kernel object to NpuKernel (of public API)"""
return NpuKernel(
@@ -241,6 +257,29 @@ def get_address_ranges(fm: NpuFeatureMap) -> List[Optional[NpuAddressRange]]:
return [t0, t1, t2, t3]
+def check_strides(fm: NpuFeatureMap, strides: NpuShape3D):
+
+ element_size_in_bytes = fm.data_type.size_in_bytes()
+
+ if fm.layout == NpuLayout.NHCWB16:
+ strides_to_check = [strides.depth, strides.height]
+ required_multiple = 16 * element_size_in_bytes
+ else:
+ strides_to_check = [strides.height, strides.width]
+ required_multiple = element_size_in_bytes
+ for stride in strides_to_check:
+ check_size(stride, required_multiple)
+
+
+def check_addresses(addresses: List[int], layout: NpuLayout, element_size, arch: ArchitectureFeatures):
+ if layout == NpuLayout.NHCWB16:
+ required_alignment = arch.storage_rounding_quantums[TensorFormat.NHCWB16][-1]
+ else:
+ required_alignment = element_size
+ for addr in addresses:
+ check_alignment(addr, required_alignment)
+
+
# -------------------------------------------------------------------
# DMA_WAIT/KERNEL_WAIT
# -------------------------------------------------------------------
@@ -336,6 +375,22 @@ def get_wait_dependency(
return cmd_waits
+def check_dma_op(dma_op: NpuDmaOperation, arch: ArchitectureFeatures):
+
+ # For Ethos-U65 only internal addresses have to be aligned, and if the internal address is the destination
+ # then the length has to be aligned also.
+ if arch.is_ethos_u65_system:
+ if dma_op.src.region == BASE_PTR_INDEX_MEM2MEM:
+ check_alignment(dma_op.src.address, 16)
+ if dma_op.dest.region == BASE_PTR_INDEX_MEM2MEM:
+ check_alignment(dma_op.dest.address, 16)
+ check_size(dma_op.src.length, 16)
+ else:
+ check_alignment(dma_op.src.address, 16)
+ check_alignment(dma_op.dest.address, 16)
+ check_size(dma_op.src.length, 16)
+
+
# -------------------------------------------------------------------
# BLOCKDEP
# -------------------------------------------------------------------
diff --git a/ethosu/vela/test/extapi/test_extapi_generate_commands.py b/ethosu/vela/test/extapi/test_extapi_generate_commands.py
index 441c4a4..6284faa 100644
--- a/ethosu/vela/test/extapi/test_extapi_generate_commands.py
+++ b/ethosu/vela/test/extapi/test_extapi_generate_commands.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2020-2021, 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -42,6 +42,8 @@ from ethosu.vela.api import NpuShape3D
from ethosu.vela.api import NpuTileBox
from ethosu.vela.architecture_features import Accelerator
from ethosu.vela.architecture_features import create_default_arch
+from ethosu.vela.errors import ByteAlignmentError
+from ethosu.vela.errors import ByteSizeError
from ethosu.vela.errors import VelaError
from ethosu.vela.ethos_u55_regs.ethos_u55_regs import cmd0
from ethosu.vela.ethos_u55_regs.ethos_u55_regs import cmd1
@@ -49,6 +51,7 @@ from ethosu.vela.high_level_command_to_npu_op import BasePointerIndex
from ethosu.vela.high_level_command_to_npu_op import get_mem_limits_for_regions
from ethosu.vela.register_command_stream_generator import CmdMode
from ethosu.vela.register_command_stream_generator import generate_command_stream
+from ethosu.vela.register_command_stream_util import BASE_PTR_INDEX_MEM2MEM
from ethosu.vela.register_command_stream_util import get_address_ranges
@@ -380,10 +383,10 @@ def test_mul_with_broadcast_and_relu():
def create_avg_pool_op() -> NpuPoolingOperation:
op = NpuPoolingOperation(NpuPoolingOp.AVERAGE)
op.ifm = create_feature_map(
- NpuShape3D(height=29, width=30, depth=27), 2, 0, quant=NpuQuantization(scale_f32=0.007843138, zero_point=128)
+ NpuShape3D(height=32, width=30, depth=28), 2, 0, quant=NpuQuantization(scale_f32=0.007843138, zero_point=128)
)
op.ofm = create_feature_map(
- NpuShape3D(height=10, width=10, depth=27),
+ NpuShape3D(height=10, width=10, depth=28),
2,
0x5BD0,
quant=NpuQuantization(scale_f32=0.20392157, zero_point=128),
@@ -778,25 +781,25 @@ def test_check_mem_limits():
# Tests that no code is generated with addresses out of bounds
conv_op = create_fully_connected_op()
# bias with end address out of range
- conv_op.biases = [NpuAddressRange(region=0, address=(1 << 32) - 16, length=1000)]
+ conv_op.biases = [NpuAddressRange(region=0, address=(1 << 32) - 16, length=1024)]
with pytest.raises(VelaError):
npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U55_64)
# same test should pass with Ethos_U65_512
npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U65_512)
# weights with end address out of range
conv_op = create_fully_connected_op()
- conv_op.weights = [NpuAddressRange(region=0, address=(1 << 40) - 960, length=1000)]
+ conv_op.weights = [NpuAddressRange(region=0, address=(1 << 40) - 960, length=1024)]
with pytest.raises(VelaError):
npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U65_256)
# bias with high end address, but still within range
addr = (1 << 40) - 1024
conv_op = create_fully_connected_op()
- conv_op.biases = [NpuAddressRange(region=0, address=addr, length=1000)]
+ conv_op.biases = [NpuAddressRange(region=0, address=addr, length=1024)]
cmds = npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U65_512)
check_cmd1(cmds, cmd1.NPU_SET_SCALE_BASE, addr & ((1 << 32) - 1), (addr >> 32) & ((1 << 16) - 1))
conv_op = create_fully_connected_op()
# weights with negative address
- conv_op.weights = [NpuAddressRange(region=0, address=-16, length=1000)]
+ conv_op.weights = [NpuAddressRange(region=0, address=-16, length=1024)]
with pytest.raises(VelaError):
npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U55_32)
op = create_avg_pool_op()
@@ -811,6 +814,126 @@ def test_check_mem_limits():
npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U55_64)
+def test_cmd1_payload_legality():
+ # Tests payload legality
+
+ # Test Bias and weight payload legality
+ # Illegal bias length fails
+ conv_op = create_fully_connected_op()
+ conv_op.biases = [NpuAddressRange(region=0, address=111, length=24)]
+ with pytest.raises(ByteSizeError):
+ npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U55_64)
+ # Legal bias length passes
+ conv_op.biases = [NpuAddressRange(region=0, address=111, length=32)]
+ npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U55_64)
+
+ # Illegal weight length fails
+ conv_op = create_fully_connected_op()
+ conv_op.weights = [NpuAddressRange(region=0, address=128, length=24)]
+ with pytest.raises(ByteSizeError):
+ npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U55_64)
+ # Legal weight length passes
+ conv_op.weights = [NpuAddressRange(region=0, address=128, length=32)]
+ npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U55_64)
+
+ # Unaligned weight adress fails
+ conv_op = create_fully_connected_op()
+ conv_op.weights = [NpuAddressRange(region=0, address=120, length=32)]
+ with pytest.raises(ByteAlignmentError):
+ npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U55_64)
+ # Aligned weight length already tested
+
+ # Test DMA payload legality
+ # Illegal dma length Ethos-U55 fails
+ dest = NpuAddressRange(BASE_PTR_INDEX_MEM2MEM, 256, 120)
+ src = NpuAddressRange(0, 512, 120)
+ dma_op = NpuDmaOperation(src, dest)
+ with pytest.raises(ByteSizeError):
+ npu_generate_register_command_stream([dma_op], NpuAccelerator.Ethos_U55_64)
+
+ # Legal dma length U55 passes
+ dest = NpuAddressRange(BASE_PTR_INDEX_MEM2MEM, 256, 128)
+ src = NpuAddressRange(0, 512, 128)
+ dma_op = NpuDmaOperation(src, dest)
+ npu_generate_register_command_stream([dma_op], NpuAccelerator.Ethos_U55_64)
+
+ # Length not a multiple of 16, Ethos-U65, internal dma destination, fails
+ dest = NpuAddressRange(BASE_PTR_INDEX_MEM2MEM, 256, 120)
+ src = NpuAddressRange(0, 512, 120)
+ dma_op = NpuDmaOperation(src, dest)
+ with pytest.raises(ByteSizeError):
+ npu_generate_register_command_stream([dma_op], NpuAccelerator.Ethos_U65_256)
+ # Length not a multiple of 16, Ethos-U65, external dma destination passes
+ dest = NpuAddressRange(2, 256, 120)
+ src = NpuAddressRange(0, 512, 120)
+ dma_op = NpuDmaOperation(src, dest)
+ npu_generate_register_command_stream([dma_op], NpuAccelerator.Ethos_U65_256)
+
+ # Test fm stride payload legality
+ ifm_shape = NpuShape3D(height=30, width=62, depth=46)
+ address = 512
+ op = NpuConv2DOperation()
+ op.ifm = create_feature_map(
+ ifm_shape,
+ 1,
+ address,
+ quant=NpuQuantization(scale_f32=0.007843138, zero_point=128),
+ dtype=NpuDataType.INT16,
+ )
+ op.ofm = create_feature_map(
+ NpuShape3D(height=30, width=31, depth=46),
+ 1,
+ 0x14E40,
+ quant=NpuQuantization(scale_f32=0.20392157, zero_point=128),
+ dtype=NpuDataType.INT16,
+ )
+ op.kernel = NpuKernel(3, 2, 2, 1)
+ op.weights = [NpuAddressRange(region=0, address=0, length=7696)]
+ op.biases = [NpuAddressRange(region=0, address=32000, length=464)]
+ op.padding = NpuPadding(top=0, left=0, right=1, bottom=1)
+ op.block_traversal = NpuBlockTraversal.PART_KERNEL_FIRST
+ op.block_config = NpuShape3D(height=16, width=4, depth=16)
+
+ # NHWC depth stride not a multiple of 32 passes
+ op.ifm.strides = NpuShape3D(depth=16, height=2, width=16)
+ npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U65_256)
+
+ # Same depth stride fails for NHCWB16
+ op.ifm = create_feature_map(
+ ifm_shape,
+ 1,
+ address,
+ quant=NpuQuantization(scale_f32=0.007843138, zero_point=128),
+ layout=NpuLayout.NHCWB16,
+ dtype=NpuDataType.INT16,
+ )
+ op.ifm.strides = NpuShape3D(depth=16, height=2, width=16)
+ with pytest.raises(ByteSizeError):
+ npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U65_256)
+
+ # Test fm adress payload alignment
+
+ # Unaligned adress fails
+ op.ifm = create_feature_map(
+ ifm_shape,
+ 1,
+ address,
+ quant=NpuQuantization(scale_f32=0.007843138, zero_point=128),
+ layout=NpuLayout.NHCWB16,
+ dtype=NpuDataType.INT16,
+ )
+ op.ifm.tiles = NpuTileBox(
+ width_0=ifm_shape.width, height_0=ifm_shape.height, height_1=ifm_shape.height, addresses=[address, 16, 16, 24]
+ )
+ with pytest.raises(ByteAlignmentError):
+ npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U65_256)
+ # Aligned address passes
+ op.ifm.tiles = NpuTileBox(
+ width_0=ifm_shape.width, height_0=ifm_shape.height, height_1=ifm_shape.height, addresses=[address, 16, 16, 16]
+ )
+ npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U65_256)
+
+
def test_check_sram_limit_spilling():
# Tests that no code is generated with addresses outside available sram spilling range
arch = create_default_arch(Accelerator.Ethos_U65_512)