diff options
author | Louis Verhaard <louis.verhaard@arm.com> | 2022-01-21 13:38:50 +0100 |
---|---|---|
committer | Louis Verhaard <louis.verhaard@arm.com> | 2022-01-25 11:44:17 +0100 |
commit | e91b531d18373299f6f337b285389b5da11264a0 (patch) | |
tree | 4b80a25e30fe50764561700eed351130c8912a23 | |
parent | 849ff81f82c10a68898e5101930b92372bec5565 (diff) | |
download | ethos-u-vela-e91b531d18373299f6f337b285389b5da11264a0.tar.gz |
MLBEDSW-6018: Fix double buffering on dual core
Only the first half of weight double buffers was used
on dual core configurations, which causes degraded performance.
Change-Id: I49972c00343bbffbae28ed11c645e993ed61d43f
Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
-rw-r--r-- | ethosu/vela/high_level_command_to_npu_op.py | 11 |
1 files changed, 8 insertions, 3 deletions
diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py index 9abfbd40..6c403c86 100644 --- a/ethosu/vela/high_level_command_to_npu_op.py +++ b/ethosu/vela/high_level_command_to_npu_op.py @@ -201,6 +201,11 @@ def get_upscale(op: Operation) -> NpuResamplingMode: return upscale +def get_double_buffer_offset(arch: ArchitectureFeatures, range_index: int, core: int) -> int: + """Returns 0 if the first half of a double buffer should be used, 1 if the second half should be used""" + return ((range_index - core) // arch.ncores) % 2 + + def get_ifm_depth(npu_block_type: NpuBlockType, ifm_box: Box, ofm_box: Box) -> int: if npu_block_type in (NpuBlockType.ConvolutionMxN, NpuBlockType.VectorProduct, NpuBlockType.ReduceSum): block = ifm_box.get_block() @@ -310,8 +315,8 @@ def create_weights( if weight_tensor.sub_purpose == TensorSubPurpose.DoubleBuffer: assert weight_tensor != w_tensor_src # Double buffered inside weight_tensor - address = weight_tensor.address + w_tensor_src.max_range_bytes * ((weight_range.index - core) % 2) - address += core_offset + address = weight_tensor.address + core_offset + address += get_double_buffer_offset(arch, weight_range.index, core) * w_tensor_src.max_range_bytes core_offset += round_up(weight_range.total_bytes, 16) else: if weight_tensor == w_tensor_src: @@ -522,7 +527,7 @@ def create_dma_op(cmd: DMA, arch: ArchitectureFeatures) -> NpuDmaOperation: if cmd.out_tensor.sub_purpose == TensorSubPurpose.DoubleBuffer: dest_addr = cmd.out_tensor.address + cmd.in_tensor.max_range_bytes * ( - (weight_range.index - core) % 2 + get_double_buffer_offset(arch, weight_range.index, core) ) else: dest_addr = cmd.out_tensor.address |