Vela: Fix perf estimation for conv 1D reshape

Change-Id: I8f139381d0e01e8ac70d89c4a312ee3000fb5fa1 Signed-off-by: Diqing Zhong <diqing.zhong@arm.com>
author: Diqing Zhong <diqing.zhong@arm.com> 2020-10-13 11:42:37 +0200
committer: tim.hall <tim.hall@arm.com> 2020-11-11 11:14:53 +0000
commit: e5204a6d1837e2d4e9601b4da5a0c47e713257bd (patch)
tree: 72a8b1a2931a39391e7851e03f22d2bbcbd23623
parent: 42e833d64918b666e81f957c56919d01bb6212cd (diff)
download: ethos-u-vela-e5204a6d1837e2d4e9601b4da5a0c47e713257bd.tar.gz
1 files changed, 21 insertions, 5 deletions
diff --git a/ethosu/vela/npu_performance.py b/ethosu/vela/npu_performance.py
index 1957952..41d75f4 100644
--- a/ethosu/vela/npu_performance.py
+++ b/ethosu/vela/npu_performance.py
@@ -283,16 +283,32 @@ def estimate_output_cycles(
 def estimate_conv_pooling_cycles(
     arch, npu_block_type, primary_op, block_config: Block, block_traversal, kernel_dims, ifm_tensor, ofm_tensor
 ):
+    ofm_ublock = Block(arch.config.ofm_ublock.width, arch.config.ofm_ublock.height, arch.config.ofm_ublock.depth)
+    ifm_tens_shape = numeric_util.full_shape(4, ifm_tensor.shape, 1)
+    ofm_tens_shape = numeric_util.full_shape(4, ofm_tensor.shape, 1)
+
+    if (
+        arch.config.ofm_ublock.height == 2
+        and npu_block_type
+        in (NpuBlockType.ConvolutionMxN, NpuBlockType.ConvolutionDepthWise, NpuBlockType.VectorProduct)
+        and ofm_tens_shape[1] == 1
+        # Optimisation only applies for even width tensors
+        and ofm_tens_shape[2] % 2 == 0
+        and kernel_dims[0] == 1
+    ):
+        ofm_ublock.width = 4
+        ofm_ublock.height = 1
+        block_config.height = 1
+
     num_ublk = (
-        (block_config.width // arch.config.ofm_ublock.width)
-        * (block_config.height // arch.config.ofm_ublock.height)
-        * (block_config.depth // arch.config.ofm_ublock.depth)
+        numeric_util.round_up_divide(block_config.width, ofm_ublock.width)
+        * (block_config.height // ofm_ublock.height)
+        * (block_config.depth // ofm_ublock.depth)
     )
     num_ofm_blk = 0
     total_cycles = 0
     num_elems_blk = block_config.width * block_config.height * block_config.depth
-    ifm_tens_shape = numeric_util.full_shape(4, ifm_tensor.shape, 1)
-    ofm_tens_shape = numeric_util.full_shape(4, ofm_tensor.shape, 1)
+
     use_acc_40bits = is_acc_40bits_used(npu_block_type, ifm_tensor, ofm_tensor)
 
     sub_kernel_limits = arch.sub_kernel_limits[npu_block_type]
author	Diqing Zhong <diqing.zhong@arm.com>	2020-10-13 11:42:37 +0200
committer	tim.hall <tim.hall@arm.com>	2020-11-11 11:14:53 +0000
commit	e5204a6d1837e2d4e9601b4da5a0c47e713257bd (patch)
tree	72a8b1a2931a39391e7851e03f22d2bbcbd23623
parent	42e833d64918b666e81f957c56919d01bb6212cd (diff)
download	ethos-u-vela-e5204a6d1837e2d4e9601b4da5a0c47e713257bd.tar.gz