diff options
author | Diqing Zhong <diqing.zhong@arm.com> | 2020-09-28 18:46:22 +0200 |
---|---|---|
committer | tim.hall <tim.hall@arm.com> | 2020-11-11 11:14:53 +0000 |
commit | 09387e207aa736c464cf95c8a57609aa21b65d44 (patch) | |
tree | d9aed24bb0537473b08611622f32401d24daa786 /ethosu/vela/shared_buffer_allocation.py | |
parent | 897cc14968e017b1f48f376f7f7cefc515c5fe88 (diff) | |
download | ethos-u-vela-09387e207aa736c464cf95c8a57609aa21b65d44.tar.gz |
MLBEDSW-3146: Cycle estimation for conv/pooling ops
Signed-off-by: Diqing Zhong <diqing.zhong@arm.com>
Change-Id: Ic6ae795a1626d1cdf63a69d2ff86f7cd898f3134
Diffstat (limited to 'ethosu/vela/shared_buffer_allocation.py')
-rw-r--r-- | ethosu/vela/shared_buffer_allocation.py | 12 |
1 files changed, 8 insertions, 4 deletions
diff --git a/ethosu/vela/shared_buffer_allocation.py b/ethosu/vela/shared_buffer_allocation.py index 484c34b0..51fb1683 100644 --- a/ethosu/vela/shared_buffer_allocation.py +++ b/ethosu/vela/shared_buffer_allocation.py @@ -37,9 +37,6 @@ class SharedBufferAllocation: self.banks_required = np.zeros(SharedBufferArea.Size) ifm_tensor, ifm2_tensor, weight_tensor, ofm_tensor = ps.get_primary_op_ifm_ifm2_weights_ofm() - tensors = [t for t in (ifm_tensor, ifm2_tensor, ofm_tensor) if t is not None] - scales = [t.quantization.scale_f32 for t in tensors if t.quantization is not None] - has_scale = len(tensors) == len(scales) and None not in scales self.kernel = Kernel(1, 1) self.is_elementwise = ps.npu_block_type == NpuBlockType.ElementWise @@ -81,7 +78,7 @@ class SharedBufferAllocation: self.ifm_count = 1 if self.ifm_bits == 16: - if ps.npu_block_type != NpuBlockType.Pooling and has_scale: + if is_acc_40bits_used(ps.npu_block_type, ifm_tensor, ofm_tensor, ifm2_tensor): self.use_accumulator_element = SHRAMElements.Acc40 self.use_ifm_element = self.use_ifm_element + 1 assert (self.use_ifm_element == SHRAMElements.IFM16) or ( @@ -171,6 +168,13 @@ class SharedBufferAllocation: ) +def is_acc_40bits_used(npu_block_type, ifm_tensor, ofm_tensor, ifm2_tensor=None): + tensors = [t for t in (ifm_tensor, ifm2_tensor, ofm_tensor) if t is not None] + scales = [t.quantization.scale_f32 for t in tensors if t.quantization is not None] + has_scale = len(tensors) == len(scales) and None not in scales + return npu_block_type != NpuBlockType.Pooling and has_scale + + def shared_buffer_allocation_for_pass_and_block_config(arch, ps, block_config): alloc = SharedBufferAllocation(arch, ps) assert (alloc.ifm_block_depth == block_config[2]) or alloc.is_equal_depth_op |