diff options
Diffstat (limited to 'ethosu/vela/test/test_new_performance.py')
-rw-r--r-- | ethosu/vela/test/test_new_performance.py | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/ethosu/vela/test/test_new_performance.py b/ethosu/vela/test/test_new_performance.py new file mode 100644 index 00000000..a35905b3 --- /dev/null +++ b/ethosu/vela/test/test_new_performance.py @@ -0,0 +1,78 @@ +# Copyright (C) 2021 Arm Limited or its affiliates. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the License); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Description: +# Contains unit tests for new performance estimation code +from ethosu.vela import architecture_allocator +from ethosu.vela import architecture_features +from ethosu.vela import npu_performance +from ethosu.vela import operation +from ethosu.vela.architecture_features import resampling_mode +from ethosu.vela.shape4d import Shape4D +from ethosu.vela.shape4d import VolumeIterator +from ethosu.vela.tensor import MemArea + + +def test_new_performance(): + arch = architecture_features.create_default_arch(architecture_features.Accelerator.Ethos_U55_128) + + query = npu_performance.PerformanceQuery(architecture_features.NpuBlockType.ConvolutionMxN) + query.ifm_shape = Shape4D(1, 16, 16, 16) + query.ifm2_shape = Shape4D() + query.ifm_memory_area = MemArea.Sram + query.ifm_bits = 8 + query.ofm_shape = Shape4D(1, 16, 16, 1) + query.ofm_memory_area = MemArea.Sram + query.ofm_bits = 8 + query.const_shape = Shape4D(1, 1, 1, query.ofm_shape.depth) + query.const_memory_area = MemArea.OffChipFlash + query.kernel = operation.Kernel(1, 1, 1, 1, 1, 1, valid_padding=False) + query.config = architecture_allocator.find_block_config( + arch, + architecture_features.NpuBlockType.ConvolutionMxN, + Shape4D(1, 16, 16, 1), + query.ifm_shape, + None, + False, + 8, + query.kernel, + 0, + False, + resampling_mode.NONE, + ) + + print("For block Config = {}".format(query.config)) + + # -s to display output + for sub_shape in [Shape4D(1, 4, 8, 16), Shape4D(1, 8, 8, 16), Shape4D(1, 8, 16, 16), query.ofm_shape]: + print("\n-- Subshape = {}".format(sub_shape)) + iterator = VolumeIterator(query.ofm_shape, sub_shape) + a = npu_performance.ElementAccess() + c = npu_performance.CycleCost() + for pos, shape in iterator: + print("\tpos = {} shape = {}".format(pos, shape)) + ta, tc = npu_performance.measure_performance_cost( + arch, operation.Op.Conv2D, operation.Op.Relu, query, pos, shape + ) + a += ta + c += tc + print("\t\taccess: {}".format(ta)) + print("\t\tcycles: {}".format(tc)) + print("\tAccess: {}".format(a)) + print("\tCycles: {}".format(c)) + assert c.op_macs == 4096 + + assert True # Any successful result is okay |