diff options
author | Andreas Nevalainen <andreas.nevalainen@arm.com> | 2020-10-14 13:55:43 +0200 |
---|---|---|
committer | patrik.gustavsson <patrik.gustavsson@arm.com> | 2020-10-19 15:01:27 +0000 |
commit | 6e827082524af57bf04833c30754384b46216e59 (patch) | |
tree | 2e7c621b061c91be19770f85640b2d5da8e6803c /ethosu/vela | |
parent | 1f951fc47abd52db0ac048802dab0c95b149d7b8 (diff) | |
download | ethos-u-vela-6e827082524af57bf04833c30754384b46216e59.tar.gz |
MLBEDSW-3194: Updated elementwise IFM banks count
Signed-off-by: Andreas Nevalainen <andreas.nevalainen@arm.com>
Change-Id: Ie404a0c13e7c7de0eff649f77e0147a0f3d73acd
Diffstat (limited to 'ethosu/vela')
-rw-r--r-- | ethosu/vela/register_command_stream_generator.py | 2 | ||||
-rw-r--r-- | ethosu/vela/shared_buffer_allocation.py | 19 |
2 files changed, 15 insertions, 6 deletions
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py index 073b50fb..4f3fe7d4 100644 --- a/ethosu/vela/register_command_stream_generator.py +++ b/ethosu/vela/register_command_stream_generator.py @@ -585,7 +585,7 @@ def generate_register_command_stream(nng, sg, arch, verbose=False): # Set IFM2_IB_START to the latter half of the IB space ifm_ib_start = shared_buffer.bank_locations[SharedBufferArea.IFM] emit.cmd0_with_param( - cmd0.NPU_SET_IFM2_IB_START, (shram_required - ifm_ib_start) / 2 + ifm_ib_start + cmd0.NPU_SET_IFM2_IB_START, (shram_required - ifm_ib_start) // shared_buffer.ifm_count + ifm_ib_start ) emit.cmd0_with_param(cmd0.NPU_SET_IFM2_BROADCAST, ifm2_broadcast) diff --git a/ethosu/vela/shared_buffer_allocation.py b/ethosu/vela/shared_buffer_allocation.py index aa5f4c86..f52d3a92 100644 --- a/ethosu/vela/shared_buffer_allocation.py +++ b/ethosu/vela/shared_buffer_allocation.py @@ -47,6 +47,7 @@ class SharedBufferAllocation: self.kernel = Kernel(1, 1) self.is_elementwise = ps.npu_block_type == NpuBlockType.ElementWise self.uses_lut = False + self.ifm_count = 1 if ps.primary_op: strides = ps.primary_op.attrs.get("strides", strides) @@ -82,11 +83,19 @@ class SharedBufferAllocation: if ifm_tensor: self.ifm_resampling_mode = ifm_tensor.resampling_mode self.ifm_bits = ifm_tensor.dtype.size_in_bits() - if ifm_tensor.shape == [] and self.is_elementwise: - # Elementwise operator with scalar in ifm, use ifm2 depth - self.ifm_depth = ifm2_tensor.shape[-1] - else: + + if ifm_tensor.shape != []: self.ifm_depth = ifm_tensor.shape[-1] + + if self.is_elementwise: + self.ifm_count = 2 + if ifm_tensor.shape == []: # Scalar in ifm1 + assert ifm2_tensor + self.ifm_depth = ifm2_tensor.shape[-1] + self.ifm_count = 1 + elif not ifm2_tensor or ifm2_tensor.shape == []: # Scalar in ifm2 + self.ifm_count = 1 + if self.ifm_bits == 16: if ps.npu_block_type != NpuBlockType.Pooling and has_scale: self.use_accumulator_element = SHRAMElements.Acc40 @@ -137,7 +146,7 @@ class SharedBufferAllocation: acc_banks = ofm_config.banks[self.use_accumulator_element] # Update bank counts for IFM and Accumulator - self.banks_required[SharedBufferArea.IFM] = ifm_config.banks[self.use_ifm_element] + self.banks_required[SharedBufferArea.IFM] = ifm_config.banks[self.use_ifm_element] * self.ifm_count self.banks_required[SharedBufferArea.Accumulators] = 0 if self.is_elementwise else acc_banks # Validating calculates bank layout and returns validity |