diff options
author | Charles Xu <charles.xu@arm.com> | 2020-04-22 08:31:43 +0200 |
---|---|---|
committer | Tim Hall <tim.hall@arm.com> | 2020-06-18 17:53:52 +0100 |
commit | 3e9c4341acf6b8334977288e2079fec0be92ecb3 (patch) | |
tree | 579178aa8d2e69feb7faa5cc3a1b4173b0fe16a2 /ethosu/vela/npu_performance.py | |
parent | 286bd5e1f99242b1f7bf6ba628fc60ac03e91162 (diff) | |
download | ethos-u-vela-3e9c4341acf6b8334977288e2079fec0be92ecb3.tar.gz |
MLBEDSW-1906: Extend IFM to full dimension
Extend IFM to full dimension for the performance
metrics calculation.
Change-Id: Iae923e37280ab0f22b7a272f28970973a5142534
Signed-off-by: Charles Xu <charles.xu@arm.com>
Diffstat (limited to 'ethosu/vela/npu_performance.py')
-rw-r--r-- | ethosu/vela/npu_performance.py | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/ethosu/vela/npu_performance.py b/ethosu/vela/npu_performance.py index 32208c9e..eda8e42b 100644 --- a/ethosu/vela/npu_performance.py +++ b/ethosu/vela/npu_performance.py @@ -259,14 +259,14 @@ def performance_metrics_for_pass(arch, ps, block_config=None, rewrite_list=[], f ifm_tensor, _, weight_tensor, ofm_tensor = ps.get_primary_op_ifm_ifm2_weights_ofm() - npu_convolution_ops = set((NpuBlockType.ConvolutionMxN, NpuBlockType.ConvolutionDepthWise)) - if (npu_block_type == NpuBlockType.Pooling and len(ifm_tensor.shape) == 4) or ( - npu_block_type in npu_convolution_ops - ): + if npu_block_type in set((NpuBlockType.ConvolutionMxN, NpuBlockType.ConvolutionDepthWise, NpuBlockType.Pooling)): + # extent the ifm to full dimension + ifm_tensor_brick_size = tuple(numeric_util.full_shape(4, list(ifm_tensor.brick_size), 1)) + ifm_tensor_shape = numeric_util.full_shape(4, ifm_tensor.shape, 1) + ifm_tensor_bandwidth_shape = numeric_util.full_shape(4, ifm_tensor.bandwidth_shape, 1) batch_size = ifm_tensor.shape[0] - ifm_tensor_shape = list(ifm_tensor.shape) - ifm_depth = ifm_tensor.bandwidth_shape[3] + ifm_depth = ifm_tensor_bandwidth_shape[3] # add in padding ifm_tensor_shape[1] += explicit_padding[0] + explicit_padding[2] # height += top and bottom @@ -313,7 +313,7 @@ def performance_metrics_for_pass(arch, ps, block_config=None, rewrite_list=[], f clamped_skirt[2] = min(clamped_skirt[2], sub_kernel_limits[0] - 1 - clamped_skirt[0]) clamped_skirt[3] = min(clamped_skirt[3], sub_kernel_limits[1] - 1 - clamped_skirt[1]) n_blocks, area, block_setup = get_n_blocks_and_area( - ifm_tensor.brick_size, + ifm_tensor_brick_size, ifm_tensor_shape[1:3], skirt, clamped_skirt, |