From 3e9c4341acf6b8334977288e2079fec0be92ecb3 Mon Sep 17 00:00:00 2001
From: Charles Xu <charles.xu@arm.com>
Date: Wed, 22 Apr 2020 08:31:43 +0200
Subject: MLBEDSW-1906: Extend IFM to full dimension

Extend IFM to full dimension for the performance
metrics calculation.

Change-Id: Iae923e37280ab0f22b7a272f28970973a5142534
Signed-off-by: Charles Xu <charles.xu@arm.com>
---
 ethosu/vela/npu_performance.py                   | 14 +++++++-------
 ethosu/vela/numeric_util.py                      |  3 +++
 ethosu/vela/register_command_stream_generator.py | 13 +++++--------
 3 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'ethosu/vela')

diff --git a/ethosu/vela/npu_performance.py b/ethosu/vela/npu_performance.py
index 32208c9e..eda8e42b 100644
--- a/ethosu/vela/npu_performance.py
+++ b/ethosu/vela/npu_performance.py
@@ -259,14 +259,14 @@ def performance_metrics_for_pass(arch, ps, block_config=None, rewrite_list=[], f
 
         ifm_tensor, _, weight_tensor, ofm_tensor = ps.get_primary_op_ifm_ifm2_weights_ofm()
 
-        npu_convolution_ops = set((NpuBlockType.ConvolutionMxN, NpuBlockType.ConvolutionDepthWise))
-        if (npu_block_type == NpuBlockType.Pooling and len(ifm_tensor.shape) == 4) or (
-            npu_block_type in npu_convolution_ops
-        ):
+        if npu_block_type in set((NpuBlockType.ConvolutionMxN, NpuBlockType.ConvolutionDepthWise, NpuBlockType.Pooling)):
+            # extent the ifm to full dimension
+            ifm_tensor_brick_size = tuple(numeric_util.full_shape(4, list(ifm_tensor.brick_size), 1))
+            ifm_tensor_shape = numeric_util.full_shape(4, ifm_tensor.shape, 1)
+            ifm_tensor_bandwidth_shape = numeric_util.full_shape(4, ifm_tensor.bandwidth_shape, 1)
 
             batch_size = ifm_tensor.shape[0]
-            ifm_tensor_shape = list(ifm_tensor.shape)
-            ifm_depth = ifm_tensor.bandwidth_shape[3]
+            ifm_depth = ifm_tensor_bandwidth_shape[3]
 
             # add in padding
             ifm_tensor_shape[1] += explicit_padding[0] + explicit_padding[2]  # height += top and bottom
@@ -313,7 +313,7 @@ def performance_metrics_for_pass(arch, ps, block_config=None, rewrite_list=[], f
             clamped_skirt[2] = min(clamped_skirt[2], sub_kernel_limits[0] - 1 - clamped_skirt[0])
             clamped_skirt[3] = min(clamped_skirt[3], sub_kernel_limits[1] - 1 - clamped_skirt[1])
             n_blocks, area, block_setup = get_n_blocks_and_area(
-                ifm_tensor.brick_size,
+                ifm_tensor_brick_size,
                 ifm_tensor_shape[1:3],
                 skirt,
                 clamped_skirt,
diff --git a/ethosu/vela/numeric_util.py b/ethosu/vela/numeric_util.py
index 1722adc2..9adf5ecb 100644
--- a/ethosu/vela/numeric_util.py
+++ b/ethosu/vela/numeric_util.py
@@ -85,3 +85,6 @@ def clamp_sigmoid(x):
     else:
         y = 1 / (1 + math.exp(-x))
     return y
+
+def full_shape(dim, shape, fill):
+    return ([fill] * (dim - len(shape))) + shape
\ No newline at end of file
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index 14898607..c0402b32 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -46,6 +46,7 @@ from .numeric_util import quantise_float32
 from .numeric_util import round_away_zero
 from .numeric_util import round_up
 from .numeric_util import round_up_to_int
+from .numeric_util import full_shape
 from .operation import NpuBlockType
 from .shared_buffer_allocation import SharedBufferAllocation
 from .tensor import MemArea
@@ -266,10 +267,6 @@ def get_op_kernel(ps):
     return Kernel(k_w, k_h, strides[2], strides[1], dilation[2], dilation[1])
 
 
-def full_shape(shape, fill):
-    return ([fill] * (4 - len(shape))) + shape
-
-
 def has_prev_op_dependency(prev_cmd, cmd):
     if prev_cmd is None:
         return False
@@ -282,14 +279,14 @@ def has_prev_op_dependency(prev_cmd, cmd):
 
 
 def get_op_ofm_rect(cmd):
-    start = full_shape(cmd.ofm_box.start_coord, 0)
-    end = full_shape(cmd.ofm_box.end_coord, 1)
+    start = full_shape(4, cmd.ofm_box.start_coord, 0)
+    end = full_shape(4, cmd.ofm_box.end_coord, 1)
     return Rect(start[-2], start[-3], start[-1], end[-2] - 1, end[-3] - 1, end[-1] - 1)
 
 
 def get_op_ifm_rect(cmd):
-    start = full_shape(cmd.ifm_box.start_coord, 0)
-    end = full_shape(cmd.ifm_box.end_coord, 1)
+    start = full_shape(4, cmd.ifm_box.start_coord, 0)
+    end = full_shape(4, cmd.ifm_box.end_coord, 1)
     return Rect(start[-2], start[-3], start[-1], end[-2] - 1, end[-3] - 1, end[-1] - 1)
 
 
-- 
cgit v1.2.1