aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMauricio Briceno <mauricio.briceno@arm.com>2021-03-19 09:13:50 +0100
committerMauricio Briceno <mauricio.briceno@arm.com>2021-03-19 10:07:08 +0100
commita8e48e6792216974d41c7fc69d9406916c153fba (patch)
treee8431dacc926c5cd522fea5c4ca4e3abfc29e9f6
parent9b3791817c32529dfeddae57c29c2abe19311fc4 (diff)
downloadethos-u-vela-a8e48e6792216974d41c7fc69d9406916c153fba.tar.gz
Address generation fix
- The architecture supports address extensions wider than 32b via the cmd1.param Change-Id: I7a01b4596f7a54f6be05b8e2c454494e6751757b Signed-off-by: Mauricio Briceno <mauricio.briceno@arm.com>
-rw-r--r--ethosu/vela/register_command_stream_generator.py29
1 files changed, 15 insertions, 14 deletions
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index d9f6b1f5..ad29dae7 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -183,6 +183,9 @@ class CommandStreamEmitter:
self.cmd_stream.append((command, offset))
self.offset += CommandStreamEmitter.WORD_SIZE * 2
+ def cmd1_with_address(self, cmd: cmd1, offset):
+ self.cmd1_with_offset(cmd, offset, offset >> 32)
+
def cmd_wait(self, cmd: cmd0, channel: int, outstanding_count: int):
param = (16 * channel) + outstanding_count
command = ((param & 0xFFFF) << 16) | cmd.value
@@ -309,10 +312,8 @@ def generate_addresses(emit: CommandStreamEmitter, ptr_cmds: List[cmd1], address
if layout == NpuLayout.NHCWB16:
# Check that all BasePointer addresses are aligned to 16 bytes
assert all((int(addr) % 16) == 0 for addr in addresses)
- emit.cmd1_with_offset(ptr_cmds[0], addresses[0])
- emit.cmd1_with_offset(ptr_cmds[1], addresses[1])
- emit.cmd1_with_offset(ptr_cmds[2], addresses[2])
- emit.cmd1_with_offset(ptr_cmds[3], addresses[3])
+ for i in range(4):
+ emit.cmd1_with_address(ptr_cmds[i], addresses[i])
def generate_tiles(emit: CommandStreamEmitter, tile_cmds: List[cmd0], tiles: NpuTileBox):
@@ -327,9 +328,9 @@ def generate_strides(
):
"""Generates STRIDE_C/Y/X registers"""
strides = get_strides(fm)
- emit.cmd1_with_offset(stride_c_cmd, strides.depth) # stride between 16-byte channel blocks (C)
- emit.cmd1_with_offset(stride_y_cmd, strides.height) # stride between vertical values (H)
- emit.cmd1_with_offset(stride_x_cmd, strides.width) # stride between horisontal values (W)
+ emit.cmd1_with_address(stride_c_cmd, strides.depth) # stride between 16-byte channel blocks (C)
+ emit.cmd1_with_address(stride_y_cmd, strides.height) # stride between vertical values (H)
+ emit.cmd1_with_address(stride_x_cmd, strides.width) # stride between horisontal values (W)
def generate_ifm_precision(emit: CommandStreamEmitter, fm: NpuFeatureMap, op_to_scale: int, precision_cmd: cmd0):
@@ -476,10 +477,10 @@ def generate_weights(emit: CommandStreamEmitter, weights: List[NpuAddressRange],
]
):
if core < len(weights):
- emit.cmd1_with_offset(addr, weights[core].address)
+ emit.cmd1_with_address(addr, weights[core].address)
emit.cmd1_with_offset(length, weights[core].length)
elif core < arch.ncores:
- emit.cmd1_with_offset(addr, weights[0].address)
+ emit.cmd1_with_address(addr, weights[0].address)
emit.cmd1_with_offset(length, 0)
@@ -493,10 +494,10 @@ def generate_biases(emit: CommandStreamEmitter, biases: List[NpuAddressRange], a
[(cmd1.NPU_SET_SCALE_BASE, cmd1.NPU_SET_SCALE_LENGTH), (cmd1.NPU_SET_SCALE1_BASE, cmd1.NPU_SET_SCALE1_LENGTH)]
):
if core < len(biases):
- emit.cmd1_with_offset(addr, biases[core].address)
+ emit.cmd1_with_address(addr, biases[core].address)
emit.cmd1_with_offset(length, biases[core].length)
elif core < arch.ncores:
- emit.cmd1_with_offset(addr, biases[0].address)
+ emit.cmd1_with_address(addr, biases[0].address)
emit.cmd1_with_offset(length, 0)
@@ -875,11 +876,11 @@ def generate_elementwise_op(emit: CommandStreamEmitter, npu_op: NpuElementWiseOp
def generate_dma_op(emit: CommandStreamEmitter, dma_op: NpuDmaOperation):
"""Generates register commands for DMA operations"""
emit.cmd0_with_param(cmd0.NPU_SET_DMA0_SRC_REGION, dma_op.src.region)
- emit.cmd1_with_offset(cmd1.NPU_SET_DMA0_SRC, dma_op.src.address)
+ emit.cmd1_with_address(cmd1.NPU_SET_DMA0_SRC, dma_op.src.address)
emit.cmd0_with_param(cmd0.NPU_SET_DMA0_DST_REGION, dma_op.dest.region)
- emit.cmd1_with_offset(cmd1.NPU_SET_DMA0_DST, dma_op.dest.address)
- emit.cmd1_with_offset(cmd1.NPU_SET_DMA0_LEN, dma_op.src.length)
+ emit.cmd1_with_address(cmd1.NPU_SET_DMA0_DST, dma_op.dest.address)
+ emit.cmd1_with_address(cmd1.NPU_SET_DMA0_LEN, dma_op.src.length)
def generate_registers_for_op(emit: CommandStreamEmitter, npu_op: NpuOperation, arch: ArchitectureFeatures):