diff options
author | Louis Verhaard <louis.verhaard@arm.com> | 2021-03-17 14:26:34 +0100 |
---|---|---|
committer | Louis Verhaard <louis.verhaard@arm.com> | 2021-03-22 10:58:02 +0100 |
commit | 024c355e51666868616b7ec560c7f87e03fcd398 (patch) | |
tree | 74f7dec6cf8fbd3a146e663e2624a19beb1895c8 /ethosu | |
parent | 1878dab5f2fb860ae98e5e1dafcdb5cec7d33349 (diff) | |
download | ethos-u-vela-024c355e51666868616b7ec560c7f87e03fcd398.tar.gz |
MLBEDSW-3502: Add address checks
Added checks during command stream generation to make sure
that address boundaries are respected.
Change-Id: I4dbc693b42d54e35c8fcc785e8be88059e409eec
Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
Diffstat (limited to 'ethosu')
-rw-r--r-- | ethosu/vela/architecture_features.py | 9 | ||||
-rw-r--r-- | ethosu/vela/errors.py | 1 | ||||
-rw-r--r-- | ethosu/vela/high_level_command_to_npu_op.py | 29 | ||||
-rw-r--r-- | ethosu/vela/register_command_stream_generator.py | 40 | ||||
-rw-r--r-- | ethosu/vela/test/extapi/test_extapi_generate_commands.py | 66 |
5 files changed, 132 insertions, 13 deletions
diff --git a/ethosu/vela/architecture_features.py b/ethosu/vela/architecture_features.py index b9c34095..168d0e67 100644 --- a/ethosu/vela/architecture_features.py +++ b/ethosu/vela/architecture_features.py @@ -246,6 +246,8 @@ class ArchitectureFeatures: self.memory_bandwidths_per_cycle = self.axi_port_width * self.memory_clock_scales / 8 self.memory_bandwidths_per_second = self.memory_bandwidths_per_cycle * self.core_clock + # Max value in address offsets + self.max_address_offset = 1 << 48 if self.is_ethos_u65_system else 1 << 32 # Get output/activation performance numbers self._generate_output_perf_tables(self.accelerator_config) @@ -456,6 +458,13 @@ class ArchitectureFeatures: self._mem_port_mapping(self.cache_mem_area) == MemArea.Sram and self.cache_mem_area != self.arena_mem_area ) + def mem_type_size(self, mem_type: MemType) -> int: + """Returns size in bytes available for the given memory type""" + if mem_type == MemType.Scratch_fast and self.is_spilling_enabled(): + return self.sram_size + # Size is unknown, return max possible address offset + return self.max_address_offset + def _mem_port_mapping(self, mem_port): mem_port_mapping = {MemPort.Axi0: self.axi0_port, MemPort.Axi1: self.axi1_port} return mem_port_mapping[mem_port] diff --git a/ethosu/vela/errors.py b/ethosu/vela/errors.py index 04468c90..918ca0a1 100644 --- a/ethosu/vela/errors.py +++ b/ethosu/vela/errors.py @@ -22,6 +22,7 @@ class VelaError(Exception): def __init__(self, data): self.data = f"Error! {data}" + self.error_msg = data def __str__(self): return repr(self.data) diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py index 56c5e747..c56eb04d 100644 --- a/ethosu/vela/high_level_command_to_npu_op.py +++ b/ethosu/vela/high_level_command_to_npu_op.py @@ -17,6 +17,7 @@ # Description: # Conversion from high level command to NpuOperation from enum import IntEnum +from typing import Dict from typing import List from typing import Optional @@ -157,7 +158,7 @@ def create_padding(cmd: NpuStripe, primary_op: Operation) -> NpuPadding: return NpuPadding(top=top, left=left, bottom=bottom, right=right) -def get_region(tens: Tensor, arch: ArchitectureFeatures) -> int: +def get_region(mem_type: MemType, arch: ArchitectureFeatures) -> int: base_ptr_idx_map = { MemType.Permanent_NPU: BasePointerIndex.WeightTensor, MemType.Permanent_CPU: BasePointerIndex.WeightTensor, @@ -169,7 +170,16 @@ def get_region(tens: Tensor, arch: ArchitectureFeatures) -> int: else: base_ptr_idx_map[MemType.Scratch_fast] = BasePointerIndex.ScratchTensor - return base_ptr_idx_map[tens.mem_type].value + return base_ptr_idx_map[mem_type].value + + +def get_mem_limits_for_regions(arch: ArchitectureFeatures) -> Dict[int, int]: + """Returns map region -> max size of the region in bytes""" + mem_limits = dict() + for mem_type in MemType.all(): + mem_limits[get_region(mem_type, arch)] = arch.mem_type_size(mem_type) + mem_limits[BASE_PTR_INDEX_MEM2MEM] = arch.shram_size_bytes + return mem_limits def get_upscale(op: Operation) -> NpuResamplingMode: @@ -238,7 +248,7 @@ def get_ofm_quantization(ps, tens: Tensor) -> Optional[NpuQuantization]: def create_feature_map(tens: Tensor, box: Box, arch: ArchitectureFeatures, op_shape4D: Shape4D) -> NpuFeatureMap: """Creates feature map with common fields populated""" fm = NpuFeatureMap() - fm.region = get_region(tens, arch) + fm.region = get_region(tens.mem_type, arch) fm.data_type = dtype_map[tens.dtype] if tens.format == TensorFormat.NHWC: fm.layout = NpuLayout.NHWC @@ -270,7 +280,7 @@ def create_weights(weight_tensor: Tensor, weight_box: Box, arch: ArchitectureFea # Extract weight substream offsets and calculate their lengths assert len(weight_substream_offsets) > 1 and (weight_substream_offsets[0] == 0) weight_addr = weight_tensor.address_for_coordinate(weight_box.start_coord) - region = get_region(weight_tensor, arch) + region = get_region(weight_tensor.mem_type, arch) for core in range(substreams): address = weight_addr + weight_substream_offsets[core] length = weight_substream_offsets[core + 1] - weight_substream_offsets[core] @@ -292,7 +302,7 @@ def create_biases( assert len(scale_substream_offsets) > 1 and (scale_substream_offsets[0] == 0) scale_addr = scale_tensor.address_for_coordinate(weight_box.start_coord[-1:]) - region = get_region(scale_tensor, arch) + region = get_region(scale_tensor.mem_type, arch) for core in range(substreams): address = scale_addr + scale_substream_offsets[core] length = scale_substream_offsets[core + 1] - scale_substream_offsets[core] @@ -447,11 +457,11 @@ def create_npu_elementwise_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> Npu def create_dma_op(cmd: DMA, arch: ArchitectureFeatures) -> NpuDmaOperation: """Converts the command to NpuDmaOperation""" - src_region = get_region(cmd.in_tensor, arch) + src_region = get_region(cmd.in_tensor.mem_type, arch) if cmd.out_tensor.purpose == TensorPurpose.LUT: dest_region = BASE_PTR_INDEX_MEM2MEM else: - dest_region = get_region(cmd.out_tensor, arch) + dest_region = get_region(cmd.out_tensor.mem_type, arch) start_coord = cmd.box.start_coord src_addr = cmd.in_tensor.address_for_coordinate(start_coord) @@ -502,6 +512,7 @@ def generate_register_command_stream_for_sg(nng, sg, arch, verbose=False): npu_op = convert_command_to_npu_op(cmd, arch) npu_op_list.append(npu_op) npu_op_to_cmd[npu_op] = cmd + mem_limits = get_mem_limits_for_regions(arch) # Generate register commands if len(sg.high_level_command_stream) > 0: stream_id = DebugDatabase.add_stream(sg) @@ -513,4 +524,6 @@ def generate_register_command_stream_for_sg(nng, sg, arch, verbose=False): cmd = npu_op_to_cmd[npu_op] DebugDatabase.add_command(stream_id, offset, cmd.ps.primary_op) - sg.register_command_stream = generate_command_stream(npu_op_list, arch, verbose, add_to_debug_db, npu_op_to_cmd) + sg.register_command_stream = generate_command_stream( + npu_op_list, arch, verbose, mem_limits, add_to_debug_db, npu_op_to_cmd + ) diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py index f9253691..a4466c92 100644 --- a/ethosu/vela/register_command_stream_generator.py +++ b/ethosu/vela/register_command_stream_generator.py @@ -72,6 +72,7 @@ from .numeric_util import round_away_zero from .numeric_util import round_up_to_int from .operation import NpuBlockType from .range_set import MemoryAccessSet +from .register_command_stream_util import BASE_PTR_INDEX_MEM2MEM from .register_command_stream_util import calc_blockdep from .register_command_stream_util import get_dma_memory_accesses from .register_command_stream_util import get_op_memory_accesses @@ -84,6 +85,7 @@ from .register_command_stream_util import Watermark from .shared_buffer_allocation import find_suitable_block_configs from .shared_buffer_allocation import shared_buffer_allocation_for_npu_op from .shared_buffer_allocation import SharedBufferAllocation +from ethosu.vela.errors import VelaError class RegisterMachine: @@ -265,6 +267,21 @@ rounding_mode_map = { } +def check_mem_limits(memory_accesses: MemoryAccessSet, mem_limits: Dict[int, int]): + """Checks that an operation's memory accesses respect the boundaries imposed by mem_limits""" + for mem_access in memory_accesses.accesses: + for region, range_set in mem_access.regions.items(): + if region not in mem_limits: + raise VelaError(f"Invalid region: {region}") + max = mem_limits[region] + for start, end in range_set.ranges: + for offset in (start, end): + if offset < 0: + raise VelaError(f"Negative address offset: {offset}, region: {region}") + if offset > max: + raise VelaError(f"Address offset out of range: {offset}, region: {region}, max: {max}") + + def quantise(value: float, quant: Optional[NpuQuantization]) -> int: """Quantizes the given value""" scale = 1 if quant is None or quant.scale_f32 is None else quant.scale_f32 @@ -904,7 +921,12 @@ def generate_registers_for_op(emit: CommandStreamEmitter, npu_op: NpuOperation, def generate_command_stream( - npu_op_list: List[NpuOperation], arch: ArchitectureFeatures, verbose: bool, add_to_debug_db=None, npu_op_to_cmd=None + npu_op_list: List[NpuOperation], + arch: ArchitectureFeatures, + verbose: bool, + mem_limits: Dict[int, int], + add_to_debug_db=None, + npu_op_to_cmd=None, ) -> List[int]: """ Generates register commands for the given list of NPU operations. @@ -922,14 +944,20 @@ def generate_command_stream( memory_accesses[npu_op] = get_op_memory_accesses(npu_op, arch) else: assert 0, "Invalid operation type" + if arch.is_ethos_u65_system: emit.cmd0_with_param(cmd0.NPU_SET_PARALLEL_MODE, arch.ncores - 1) dep_watermark = Watermark(0, 0) prev_op = None # Generate register commands for all operations for op_index, npu_op in enumerate(npu_op_list): - dep_watermark, cmd_waits = get_wait_dependency(arch, npu_op_list, memory_accesses, op_index, dep_watermark) - generate_registers_for_op(emit, npu_op, arch) + try: + check_mem_limits(memory_accesses[npu_op], mem_limits) + dep_watermark, cmd_waits = get_wait_dependency(arch, npu_op_list, memory_accesses, op_index, dep_watermark) + generate_registers_for_op(emit, npu_op, arch) + except VelaError as e: + # Add operation info and rethrow + raise VelaError(f"{e.error_msg}, in operation {op_index}:{npu_op.op_type.name}") from None if not isinstance(npu_op, NpuDmaOperation) and isinstance(npu_op, NpuBlockOperation): # Generate BLOCKDEP blockdep = calc_blockdep(arch, prev_op, npu_op) @@ -987,4 +1015,8 @@ def generate_register_command_stream(npu_op_list: List[NpuOperation], npu_accele """ accelerator = Accelerator.from_npu_accelerator(npu_accelerator) arch = create_default_arch(accelerator) - return generate_command_stream(npu_op_list, arch, verbose=False) + mem_limits = dict() + for region in range(0, 8): + mem_limits[region] = arch.max_address_offset + mem_limits[BASE_PTR_INDEX_MEM2MEM] = arch.shram_size_bytes + return generate_command_stream(npu_op_list, arch, verbose=False, mem_limits=mem_limits) diff --git a/ethosu/vela/test/extapi/test_extapi_generate_commands.py b/ethosu/vela/test/extapi/test_extapi_generate_commands.py index b605dfc5..db0485c5 100644 --- a/ethosu/vela/test/extapi/test_extapi_generate_commands.py +++ b/ethosu/vela/test/extapi/test_extapi_generate_commands.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved. +# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 # @@ -16,6 +16,8 @@ # # Description: # Contains unit tests for npu_generate_register_command_stream API for an external consumer +import pytest + from ethosu.vela.api import npu_find_block_configs from ethosu.vela.api import npu_generate_register_command_stream from ethosu.vela.api import NpuAccelerator @@ -38,9 +40,15 @@ from ethosu.vela.api import NpuPoolingOperation from ethosu.vela.api import NpuQuantization from ethosu.vela.api import NpuShape3D from ethosu.vela.api import NpuTileBox +from ethosu.vela.architecture_features import Accelerator +from ethosu.vela.architecture_features import create_default_arch +from ethosu.vela.errors import VelaError from ethosu.vela.ethos_u55_regs.ethos_u55_regs import cmd0 from ethosu.vela.ethos_u55_regs.ethos_u55_regs import cmd1 +from ethosu.vela.high_level_command_to_npu_op import BasePointerIndex +from ethosu.vela.high_level_command_to_npu_op import get_mem_limits_for_regions from ethosu.vela.register_command_stream_generator import CmdMode +from ethosu.vela.register_command_stream_generator import generate_command_stream from ethosu.vela.register_command_stream_util import get_address_ranges @@ -355,3 +363,59 @@ def test_dma_op(): # A DMA WAIT should have been inserted check_cmd0(cmds, cmd0.NPU_OP_DMA_WAIT, 0) check_cmd0(cmds, cmd0.NPU_OP_POOL, 1) + + +def test_check_mem_limits(): + # Tests that no code is generated with addresses out of bounds + conv_op = create_fully_connected_op() + # bias with end address out of range + conv_op.biases = [NpuAddressRange(region=0, address=(1 << 32) - 16, length=1000)] + with pytest.raises(VelaError): + npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U55_64) + # same test should pass with Ethos_U65_512 + npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U65_512) + # weights with end address out of range + conv_op = create_fully_connected_op() + conv_op.weights = [NpuAddressRange(region=0, address=(1 << 48) - 960, length=1000)] + with pytest.raises(VelaError): + npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U65_256) + # bias with high end address, but still within range + conv_op = create_fully_connected_op() + conv_op.biases = [NpuAddressRange(region=0, address=(1 << 48) - 1024, length=1000)] + npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U65_512) + conv_op = create_fully_connected_op() + # weights with negative address + conv_op.weights = [NpuAddressRange(region=0, address=-16, length=1000)] + with pytest.raises(VelaError): + npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U55_32) + op = create_avg_pool_op() + # Tile 4's end address out of range + op.ifm.tiles = NpuTileBox(width_0=1, height_0=1, height_1=1, addresses=[0, 800, 4000, (1 << 32) - 16]) + with pytest.raises(VelaError): + npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U55_256) + op = create_avg_pool_op() + # IFM region out of range + op.ifm.region = 8 + with pytest.raises(VelaError): + npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U55_64) + + +def test_check_sram_limit_spilling(): + # Tests that no code is generated with addresses outside available sram spilling range + arch = create_default_arch(Accelerator.Ethos_U65_512) + assert arch.is_spilling_enabled() + op = create_avg_pool_op() + op.ifm.region = 0 + # OFM in scratch fast memory + op.ofm.region = int(BasePointerIndex.ScratchFastTensor) + w, h = op.ofm.shape.width, op.ofm.shape.height + op.ofm.tiles = NpuTileBox(width_0=w, height_0=h, height_1=h, addresses=[32 * 1024, 0, 0, 0]) + # 384K for spilling should fit + arch.sram_size = 384 * 1024 + mem_limits = get_mem_limits_for_regions(arch) + generate_command_stream([op], arch, verbose=False, mem_limits=mem_limits) + # 32K for spilling does not fit, due to the OFM address + arch.sram_size = 32 * 1024 + mem_limits = get_mem_limits_for_regions(arch) + with pytest.raises(VelaError): + generate_command_stream([op], arch, verbose=False, mem_limits=mem_limits) |