diff options
Diffstat (limited to 'tests/benchmark')
44 files changed, 0 insertions, 5834 deletions
diff --git a/tests/benchmark/CL/ActivationLayer.cpp b/tests/benchmark/CL/ActivationLayer.cpp deleted file mode 100644 index 52a357b2a6..0000000000 --- a/tests/benchmark/CL/ActivationLayer.cpp +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "CL/CLAccessor.h" -#include "Globals.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLActivationLayer.h" - -#include "benchmark/benchmark_api.h" - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::cl; - -#include "benchmark/common/ActivationLayer.h" - -namespace -{ -using ActivationLayerAlexNet = ActivationLayer<AlexNetActivationLayerDataset, CLTensor, CLAccessor, CLActivationLayer>; -using ActivationLayerLeNet5 = ActivationLayer<LeNet5ActivationLayerDataset, CLTensor, CLAccessor, CLActivationLayer>; -using ActivationLayerGoogLeNet = ActivationLayer<GoogLeNetActivationLayerDataset, CLTensor, CLAccessor, CLActivationLayer>; -} // namespace - -BENCHMARK_DEFINE_F(ActivationLayerAlexNet, cl_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - act_layer.run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 2, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 3, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 4, 1, 4, 8>); - -BENCHMARK_DEFINE_F(ActivationLayerLeNet5, cl_lenet5) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - act_layer.run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(ActivationLayerLeNet5, cl_lenet5) -->Threads(1) -->Apply(DataSetArgBatched<LeNet5ActivationLayerDataset, 0, 1, 4, 8>); - -BENCHMARK_DEFINE_F(ActivationLayerGoogLeNet, cl_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - act_layer.run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 2, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 3, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 4, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 5, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 6, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 7, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 8, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 9, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 10, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 11, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 12, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 13, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 14, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 15, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 16, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 17, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 18, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 19, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 20, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 21, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 22, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 23, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 24, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 25, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 26, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 27, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 28, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 29, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 30, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 31, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 32, 1, 4, 8>); diff --git a/tests/benchmark/CL/BitwiseAnd.cpp b/tests/benchmark/CL/BitwiseAnd.cpp deleted file mode 100644 index 4858c73948..0000000000 --- a/tests/benchmark/CL/BitwiseAnd.cpp +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "CL/CLAccessor.h" -#include "Globals.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLBitwiseAnd.h" - -#include "benchmark/benchmark_api.h" - -#include <memory> -#include <string> - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::cl; - -namespace -{ -template <typename DataSet> -class BitwiseAnd : public ::benchmark::Fixture -{ -public: - void SetUp(::benchmark::State &state) override - { - ::benchmark::Fixture::SetUp(state); - - profiler.add(std::make_shared<WallClockTimer>()); - - const std::string image_name = *(DataSet().begin() + state.range(0)); - const RawTensor &raw = library->get(image_name); - - // Create tensors - src1 = create_tensor<CLTensor>(raw.shape(), DataType::U8); - src2 = create_tensor<CLTensor>(raw.shape(), DataType::U8); - dst = create_tensor<CLTensor>(raw.shape(), DataType::U8); - - // Create and configure function - band.configure(&src1, &src2, &dst); - - // Allocate tensors - src1.allocator()->allocate(); - src2.allocator()->allocate(); - dst.allocator()->allocate(); - - // Fill source tensors - library->fill(CLAccessor(src1), image_name, Channel::R); - library->fill(CLAccessor(src2), image_name, Channel::G); - } - - void TearDown(::benchmark::State &state) override - { - profiler.submit(state); - - ::benchmark::Fixture::TearDown(state); - } - - CLBitwiseAnd band{}; - Profiler profiler{}; - -private: - CLTensor src1{}; - CLTensor src2{}; - CLTensor dst{}; -}; - -using BitwiseAndSmall = BitwiseAnd<SmallImages>; -using BitwiseAndLarge = BitwiseAnd<LargeImages>; -} // namespace - -BENCHMARK_DEFINE_F(BitwiseAndSmall, cl_bitwise_and) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - band.run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(BitwiseAndSmall, cl_bitwise_and) -->Threads(1) -->Apply(DataSetArgs<SmallImages>); - -BENCHMARK_DEFINE_F(BitwiseAndLarge, cl_bitwise_and) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - band.run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(BitwiseAndLarge, cl_bitwise_and) -->Threads(1) -->Apply(DataSetArgs<LargeImages>); diff --git a/tests/benchmark/CL/CMakeLists.txt b/tests/benchmark/CL/CMakeLists.txt deleted file mode 100644 index 8493309f40..0000000000 --- a/tests/benchmark/CL/CMakeLists.txt +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2017 ARM Limited. -# -# SPDX-License-Identifier: MIT -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to -# deal in the Software without restriction, including without limitation the -# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -# sell copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -cmake_minimum_required (VERSION 3.1) - -include_directories(${CMAKE_SOURCE_DIR}/../include) - -set(arm_compute_test_benchmark_TARGET_DEFINITIONS - ${arm_compute_test_benchmark_TARGET_DEFINITIONS} - -DOPENCL - PARENT_SCOPE -) - -set(arm_compute_test_benchmark_TARGET_INCLUDES - ${arm_compute_test_benchmark_TARGET_INCLUDES} - ${CMAKE_SOURCE_DIR}/../include - PARENT_SCOPE -) - -set(arm_compute_test_benchmark_OPENCL_SOURCE_FILES - ${CMAKE_SOURCE_DIR}/CL/CLAccessor.h - ${CMAKE_CURRENT_SOURCE_DIR}/Bitwise/BitwiseAnd.cpp -) - -add_library(arm_compute_test_benchmark_OPENCL OBJECT - ${arm_compute_test_benchmark_OPENCL_SOURCE_FILES} -) - -set(arm_compute_test_benchmark_TARGET_OBJECTS - ${arm_compute_test_benchmark_TARGET_OBJECTS} - $<TARGET_OBJECTS:arm_compute_test_benchmark_OPENCL> - PARENT_SCOPE -) - -set(arm_compute_test_benchmark_TARGET_LIBRARIES - ${arm_compute_test_benchmark_TARGET_LIBRARIES} - OpenCL - PARENT_SCOPE -) diff --git a/tests/benchmark/CL/ConvolutionLayer.cpp b/tests/benchmark/CL/ConvolutionLayer.cpp deleted file mode 100644 index e790273f9c..0000000000 --- a/tests/benchmark/CL/ConvolutionLayer.cpp +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "CL/CLAccessor.h" -#include "Globals.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" - -#include "benchmark/benchmark_api.h" - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::cl; - -#include "benchmark/common/ConvolutionLayer.h" - -namespace -{ -using ConvolutionLayerAlexNet = ConvolutionLayer<AlexNetConvolutionLayerDataset, CLTensor, CLAccessor, CLConvolutionLayer>; -using ConvolutionLayerLeNet5 = ConvolutionLayer<LeNet5ConvolutionLayerDataset, CLTensor, CLAccessor, CLConvolutionLayer>; -using ConvolutionLayerGoogLeNet1 = ConvolutionLayer<GoogLeNetConvolutionLayerDataset1, CLTensor, CLAccessor, CLConvolutionLayer>; -using ConvolutionLayerGoogLeNet2 = ConvolutionLayer<GoogLeNetConvolutionLayerDataset2, CLTensor, CLAccessor, CLConvolutionLayer>; -} // namespace - -BENCHMARK_DEFINE_F(ConvolutionLayerAlexNet, cl_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - conv_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 2, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 3, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 4, 1, 4, 8>); - -BENCHMARK_DEFINE_F(ConvolutionLayerLeNet5, cl_lenet5) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - conv_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(ConvolutionLayerLeNet5, cl_lenet5) -->Threads(1) -->Apply(DataSetArgBatched<LeNet5ConvolutionLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerLeNet5, cl_lenet5) -->Threads(1) -->Apply(DataSetArgBatched<LeNet5ConvolutionLayerDataset, 1, 1, 4, 8>); - -BENCHMARK_DEFINE_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - conv_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_DEFINE_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - conv_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 2, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 3, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 4, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 5, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 6, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 7, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 8, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 9, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 10, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 11, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 12, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 13, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 14, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 15, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 16, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 17, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 18, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 19, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 20, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 21, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 22, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 23, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 24, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 25, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 26, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 27, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 28, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 29, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 30, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 31, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 2, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 3, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 4, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 5, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 6, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 7, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 8, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 9, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 10, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 11, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 12, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 13, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 14, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 15, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 16, 1, 4, 8>); diff --git a/tests/benchmark/CL/FullyConnectedLayer.cpp b/tests/benchmark/CL/FullyConnectedLayer.cpp deleted file mode 100644 index fb8e1bc09f..0000000000 --- a/tests/benchmark/CL/FullyConnectedLayer.cpp +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "CL/CLAccessor.h" -#include "Globals.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" - -#include "benchmark/benchmark_api.h" - -#include <memory> -#include <string> - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::cl; - -#include "benchmark/common/FullyConnectedLayer.h" - -namespace -{ -using FullyConnectedLayerAlexNet = FullyConnectedLayer<AlexNetFullyConnectedLayerDataset, CLTensor, CLAccessor, CLFullyConnectedLayer>; -using FullyConnectedLayerLeNet5 = FullyConnectedLayer<LeNet5FullyConnectedLayerDataset, CLTensor, CLAccessor, CLFullyConnectedLayer>; -using FullyConnectedLayerGoogLeNet = FullyConnectedLayer<GoogLeNetFullyConnectedLayerDataset, CLTensor, CLAccessor, CLFullyConnectedLayer>; -} // namespace - -BENCHMARK_DEFINE_F(FullyConnectedLayerAlexNet, cl_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - fc_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 2, 1, 4, 8>); - -BENCHMARK_DEFINE_F(FullyConnectedLayerLeNet5, cl_lenet5) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - fc_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(FullyConnectedLayerLeNet5, cl_lenet5) -->Threads(1) -->Apply(DataSetArgBatched<LeNet5FullyConnectedLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(FullyConnectedLayerLeNet5, cl_lenet5) -->Threads(1) -->Apply(DataSetArgBatched<LeNet5FullyConnectedLayerDataset, 1, 1, 4, 8>); - -BENCHMARK_DEFINE_F(FullyConnectedLayerGoogLeNet, cl_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - fc_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(FullyConnectedLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetFullyConnectedLayerDataset, 0, 1, 4, 8>); diff --git a/tests/benchmark/CL/GEMM.cpp b/tests/benchmark/CL/GEMM.cpp deleted file mode 100644 index 87dad05f30..0000000000 --- a/tests/benchmark/CL/GEMM.cpp +++ /dev/null @@ -1,539 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "CL/CLAccessor.h" -#include "Globals.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLGEMM.h" - -#include "benchmark/benchmark_api.h" - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::cl; - -#include "benchmark/CL/GEMM.h" - -namespace -{ -using GEMMFP16GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, CLTensor, CLAccessor, CLGEMM, DataType::F16>; -using GEMMFP16GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, CLTensor, CLAccessor, CLGEMM, DataType::F16>; -using GEMMFP32GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, CLTensor, CLAccessor, CLGEMM, DataType::F32>; -using GEMMFP32GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, CLTensor, CLAccessor, CLGEMM, DataType::F32>; -using FP16MatrixMultiply = GEMM<MatrixMultiplyDataset, CLTensor, CLAccessor, CLGEMM, DataType::F16>; -using FP32MatrixMultiply = GEMM<MatrixMultiplyDataset, CLTensor, CLAccessor, CLGEMM, DataType::F32>; -} // namespace - -BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet1, cl_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - gemm_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet2, cl_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - gemm_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>); - -BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet1, cl_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - gemm_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet2, cl_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - gemm_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>); - -BENCHMARK_DEFINE_F(FP16MatrixMultiply, cl_matrix_multiply) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - gemm_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(FP16MatrixMultiply, cl_matrix_multiply) -->Threads(1) -->Apply(DataSetArg<MatrixMultiplyDataset, 0>); -BENCHMARK_REGISTER_F(FP16MatrixMultiply, cl_matrix_multiply) -->Threads(1) -->Apply(DataSetArg<MatrixMultiplyDataset, 1>); -BENCHMARK_REGISTER_F(FP16MatrixMultiply, cl_matrix_multiply) -->Threads(1) -->Apply(DataSetArg<MatrixMultiplyDataset, 2>); - -BENCHMARK_DEFINE_F(FP32MatrixMultiply, cl_matrix_multiply) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - gemm_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(FP32MatrixMultiply, cl_matrix_multiply) -->Threads(1) -->Apply(DataSetArg<MatrixMultiplyDataset, 0>); -BENCHMARK_REGISTER_F(FP32MatrixMultiply, cl_matrix_multiply) -->Threads(1) -->Apply(DataSetArg<MatrixMultiplyDataset, 1>); -BENCHMARK_REGISTER_F(FP32MatrixMultiply, cl_matrix_multiply) -->Threads(1) -->Apply(DataSetArg<MatrixMultiplyDataset, 2>); diff --git a/tests/benchmark/CL/GEMM.h b/tests/benchmark/CL/GEMM.h deleted file mode 100644 index ca3d9ad594..0000000000 --- a/tests/benchmark/CL/GEMM.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_TEST_BENCHMARK_CL_GEMM_H__ -#define __ARM_COMPUTE_TEST_BENCHMARK_CL_GEMM_H__ - -#include "TensorLibrary.h" -#include "Utils.h" -#include "dataset/GEMMDataset.h" - -#include <memory> - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; - -namespace arm_compute -{ -namespace test -{ -namespace benchmark -{ -// FIXME: Merge with NEON/GEMM.h into common/GEMM.h after adding F16 support to NEON GEMM and QS8 support to CL GEMM -template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType data_type> -class GEMM : public ::benchmark::Fixture -{ -public: - void SetUp(::benchmark::State &state) override - { - ARM_COMPUTE_ERROR_ON_MSG(data_type != DataType::F16 && data_type != DataType::F32, "Unsupported data type for GEMM operation"); - - profiler.add(std::make_shared<WallClockTimer>()); - - const GEMMDataObject gemm_obj = *(DataSet().begin() + state.range(0)); - - TensorShape shape_a = gemm_obj.shape_a; - TensorShape shape_b = gemm_obj.shape_b; - TensorShape shape_c = gemm_obj.shape_c; - TensorShape shape_d = gemm_obj.shape_d; - - // Create tensors - a = create_tensor<CLTensor>(shape_a, data_type); - b = create_tensor<CLTensor>(shape_b, data_type); - c = create_tensor<CLTensor>(shape_c, data_type); - d = create_tensor<CLTensor>(shape_d, data_type); - - // Create and configure function - gemm_layer = std::unique_ptr<Function>(new Function()); - gemm_layer->configure(&a, &b, &c, &d, gemm_obj.alpha, gemm_obj.beta); - - // Allocate tensors - a.allocator()->allocate(); - b.allocator()->allocate(); - c.allocator()->allocate(); - d.allocator()->allocate(); - } - - void TearDown(::benchmark::State &state) override - { - gemm_layer.reset(); - - a.allocator()->free(); - b.allocator()->free(); - c.allocator()->free(); - d.allocator()->free(); - - profiler.submit(state); - } - - std::unique_ptr<Function> gemm_layer{ nullptr }; - Profiler profiler{}; - -private: - TensorType a{}; - TensorType b{}; - TensorType c{}; - TensorType d{}; -}; -} // namespace benchmark -} // namespace test -} // namespace arm_compute -#endif //__ARM_COMPUTE_TEST_BENCHMARK_CL_GEMM_H__ diff --git a/tests/benchmark/CL/NormalizationLayer.cpp b/tests/benchmark/CL/NormalizationLayer.cpp deleted file mode 100644 index 28f89dce1f..0000000000 --- a/tests/benchmark/CL/NormalizationLayer.cpp +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "CL/CLAccessor.h" -#include "Globals.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h" - -#include "benchmark/benchmark_api.h" - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::cl; - -#include "benchmark/common/NormalizationLayer.h" - -namespace -{ -using NormalizationLayerAlexNet = NormalizationLayer<AlexNetNormalizationLayerDataset, CLTensor, CLAccessor, CLNormalizationLayer>; -using NormalizationLayerGoogLeNet = NormalizationLayer<GoogLeNetNormalizationLayerDataset, CLTensor, CLAccessor, CLNormalizationLayer>; - -} // namespace - -BENCHMARK_DEFINE_F(NormalizationLayerAlexNet, cl_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - norm_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(NormalizationLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(NormalizationLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 1, 1, 4, 8>); - -BENCHMARK_DEFINE_F(NormalizationLayerGoogLeNet, cl_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - norm_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(NormalizationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetNormalizationLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(NormalizationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetNormalizationLayerDataset, 1, 1, 4, 8>); diff --git a/tests/benchmark/CL/PoolingLayer.cpp b/tests/benchmark/CL/PoolingLayer.cpp deleted file mode 100644 index 05764a412a..0000000000 --- a/tests/benchmark/CL/PoolingLayer.cpp +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "CL/CLAccessor.h" -#include "Globals.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h" - -#include "benchmark/benchmark_api.h" - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::cl; - -#include "benchmark/common/PoolingLayer.h" - -namespace -{ -using PoolingLayerAlexNet = PoolingLayer<AlexNetPoolingLayerDataset, CLTensor, CLAccessor, CLPoolingLayer>; -using PoolingLayerLeNet5 = PoolingLayer<LeNet5PoolingLayerDataset, CLTensor, CLAccessor, CLPoolingLayer>; -using PoolingLayerGoogLeNet = PoolingLayer<GoogLeNetPoolingLayerDataset, CLTensor, CLAccessor, CLPoolingLayer>; -} // namespace - -BENCHMARK_DEFINE_F(PoolingLayerAlexNet, cl_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - pool_layer.run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(PoolingLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 2, 1, 4, 8>); - -BENCHMARK_DEFINE_F(PoolingLayerLeNet5, cl_lenet5) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - pool_layer.run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(PoolingLayerLeNet5, cl_lenet5) -->Threads(1) -->Apply(DataSetArgBatched<LeNet5PoolingLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerLeNet5, cl_lenet5) -->Threads(1) -->Apply(DataSetArgBatched<LeNet5PoolingLayerDataset, 1, 1, 4, 8>); - -BENCHMARK_DEFINE_F(PoolingLayerGoogLeNet, cl_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - pool_layer.run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -// FIXME: Add support for 7x7 pooling layer pool5/7x7_s1 -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 2, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 3, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 4, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 5, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 6, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 7, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 8, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 9, 1, 4, 8>); diff --git a/tests/benchmark/CMakeLists.txt b/tests/benchmark/CMakeLists.txt deleted file mode 100644 index 115333a1b0..0000000000 --- a/tests/benchmark/CMakeLists.txt +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) 2017 ARM Limited. -# -# SPDX-License-Identifier: MIT -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to -# deal in the Software without restriction, including without limitation the -# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -# sell copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -cmake_minimum_required (VERSION 3.1) - -add_library(benchmark STATIC IMPORTED) -set_target_properties(benchmark PROPERTIES - IMPORTED_LOCATION "${CMAKE_SOURCE_DIR}/../3rdparty/linux/armv7a/libbenchmark.a" -) - -add_library(OpenCL SHARED IMPORTED) -set_target_properties(OpenCL PROPERTIES - IMPORTED_LOCATION "${CMAKE_SOURCE_DIR}/../build/opencl-1.2-stubs/libOpenCL.so" - IMPORTED_NO_SONAME 1 -) - -option(ENABLE_PMU_COUNTER "Compile with PMU counter support") - -set(ARM_COMPUTE_TARGETS_TO_MEASURE "all" CACHE STRING "Semicolon-separated list of targets to include in validation.") - -set(ARM_COMPUTE_ALL_TARGETS - NEON - CL -) - -if(ARM_COMPUTE_TARGETS_TO_MEASURE STREQUAL "all") - set(ARM_COMPUTE_TARGETS_TO_MEASURE ${ARM_COMPUTE_ALL_TARGETS}) -endif() - -list(REMOVE_DUPLICATES ARM_COMPUTE_TARGETS_TO_MEASURE) - -foreach(TARGET ${ARM_COMPUTE_TARGETS_TO_MEASURE}) - list(FIND ARM_COMPUTE_ALL_TARGETS ${TARGET} idx) - - if(${idx} LESS 0) - message(FATAL_ERROR "The target '${TARGET}' does not exist. It should be one of\n${ARM_COMPUTE_ALL_TARGETS}") - else() - add_subdirectory(${TARGET}) - endif() -endforeach() - -set(arm_compute_test_benchmark_SOURCE_FILES - ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/Datasets.h - ${CMAKE_CURRENT_SOURCE_DIR}/Instrument.h - ${CMAKE_CURRENT_SOURCE_DIR}/Profiler.h - ${CMAKE_CURRENT_SOURCE_DIR}/Profiler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/PerformanceProgramOptions.h - ${CMAKE_CURRENT_SOURCE_DIR}/PerformanceProgramOptions.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/PerformanceUserConfiguration.h - ${CMAKE_CURRENT_SOURCE_DIR}/PerformanceUserConfiguration.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/WallClockTimer.h - ${CMAKE_CURRENT_SOURCE_DIR}/WallClockTimer.cpp -) - -if(${ENABLE_PMU_COUNTER}) - list(APPEND arm_compute_test_benchmark_SOURCE_FILES - ${CMAKE_CURRENT_SOURCE_DIR}/PMUCounter.h - ${CMAKE_CURRENT_SOURCE_DIR}/PMUCounter.cpp - ) -endif() - -add_library(arm_compute_test_benchmark OBJECT - ${arm_compute_test_benchmark_SOURCE_FILES} -) - -add_definitions(${arm_compute_test_benchmark_TARGET_DEFINITIONS}) -include_directories(${arm_compute_test_benchmark_TARGET_INCLUDES}) - -add_executable(arm_compute_benchmark - $<TARGET_OBJECTS:arm_compute_test_benchmark> - ${arm_compute_test_benchmark_TARGET_OBJECTS} - $<TARGET_OBJECTS:tensor_library> - $<TARGET_OBJECTS:arm_compute_test> -) - -target_link_libraries(arm_compute_benchmark - benchmark - boost_program_options - arm_compute - ${arm_compute_test_benchmark_TARGET_LIBRARIES} -) diff --git a/tests/benchmark/Datasets.h b/tests/benchmark/Datasets.h deleted file mode 100644 index cb45b50589..0000000000 --- a/tests/benchmark/Datasets.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_TEST_BENCHMARK_DATASETS_H__ -#define __ARM_COMPUTE_TEST_BENCHMARK_DATASETS_H__ - -#include "dataset/ActivationLayerDataset.h" -#include "dataset/BorderModeDataset.h" -#include "dataset/ConvolutionLayerDataset.h" -#include "dataset/DataTypeDatasets.h" -#include "dataset/FullyConnectedLayerDataset.h" -#include "dataset/GEMMDataset.h" -#include "dataset/ImageDatasets.h" -#include "dataset/InterpolationPolicyDataset.h" -#include "dataset/NormalizationLayerDataset.h" -#include "dataset/PoolingLayerDataset.h" -#include "dataset/ShapeDatasets.h" - -#include "benchmark/benchmark_api.h" - -#include <array> - -namespace arm_compute -{ -namespace test -{ -namespace benchmark -{ -template <typename DataSet, int N> -void DataSetArg(::benchmark::internal::Benchmark *b) -{ - b->Arg(N); - b->ArgName(std::string(*(DataSet().begin() + N))); -} - -template <typename DataSet, int N, unsigned int... Args> -void DataSetArgBatched(::benchmark::internal::Benchmark *b) -{ - constexpr std::array<unsigned int, sizeof...(Args)> batches{ { Args... } }; - for(const auto &el : batches) - { - b->Args({ N, static_cast<int>(el) }); - } - b->ArgNames({ std::string(*(DataSet().begin() + N)), "batch_size" }); -} - -template <typename DataSet> -void DataSetArgs(::benchmark::internal::Benchmark *b) -{ - for(size_t i = 0; i < DataSet().size(); ++i) - { - b->Arg(i); - b->ArgName(*(DataSet().begin() + i)); - } -} -} -} -} -#endif /* __ARM_COMPUTE_TEST_BENCHMARK_DATASETS_H__ */ diff --git a/tests/benchmark/Instrument.h b/tests/benchmark/Instrument.h deleted file mode 100644 index 5034471091..0000000000 --- a/tests/benchmark/Instrument.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_TEST_BENCHMARK_INSTRUMENT_H__ -#define __ARM_COMPUTE_TEST_BENCHMARK_INSTRUMENT_H__ - -#include "Utils.h" - -#include <memory> -#include <string> - -namespace arm_compute -{ -namespace test -{ -namespace benchmark -{ -/** Interface for classes that can be used to measure performance. */ -class Instrument -{ -public: - /** Interface defining a measurement, e.g. time, cycles, ... */ - class IMeasurement - { - public: - IMeasurement() = default; - IMeasurement(const IMeasurement &) = default; - IMeasurement(IMeasurement &&) = default; - IMeasurement &operator=(const IMeasurement &) = default; - IMeasurement &operator=(IMeasurement &&) = default; - virtual ~IMeasurement() = default; - - virtual operator double() const = 0; - }; - - /** Implementation of a Measurement class for arihtmetic types. */ - template <typename T> - class Measurement : public IMeasurement - { - public: - /** Store the given value as measurement. - * - * @param[in] value Measured value. - */ - Measurement(T value); - - operator double() const override; - - private: - T _value; - }; - - Instrument() = default; - Instrument(const Instrument &) = default; - Instrument(Instrument &&) = default; - Instrument &operator=(const Instrument &) = default; - Instrument &operator=(Instrument &&) = default; - virtual ~Instrument() = default; - - /** Identifier for the instrument */ - virtual std::string id() const = 0; - - /** Start measuring. */ - virtual void start() = 0; - - /** Stop measuring. */ - virtual void stop() = 0; - - /** Return the latest measurement. */ - virtual std::unique_ptr<IMeasurement> get_measurement() const = 0; -}; - -template <typename T> -Instrument::Measurement<T>::Measurement(T value) - : _value{ value } -{ -} - -template <typename T> -Instrument::Measurement<T>::operator double() const -{ - return _value; -} -} // namespace benchmark -} // namespace test -} // namespace arm_compute -#endif /* __ARM_COMPUTE_TEST_BENCHMARK_INSTRUMENT_H__ */ diff --git a/tests/benchmark/NEON/ActivationLayer.cpp b/tests/benchmark/NEON/ActivationLayer.cpp deleted file mode 100644 index 9ea675e515..0000000000 --- a/tests/benchmark/NEON/ActivationLayer.cpp +++ /dev/null @@ -1,238 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "Globals.h" -#include "NEON/NEAccessor.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" - -#include "benchmark/benchmark_api.h" - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::neon; - -#include "benchmark/common/ActivationLayer.h" - -namespace -{ -using ActivationLayerAlexNetF32 = ActivationLayer<AlexNetActivationLayerDataset, Tensor, NEAccessor, NEActivationLayer>; -using ActivationLayerAlexNetQS8 = ActivationLayer<AlexNetActivationLayerDataset, Tensor, NEAccessor, NEActivationLayer, DataType::QS8>; -using ActivationLayerLeNet5 = ActivationLayer<LeNet5ActivationLayerDataset, Tensor, NEAccessor, NEActivationLayer, DataType::F32>; -using ActivationLayerGoogLeNet = ActivationLayer<GoogLeNetActivationLayerDataset, Tensor, NEAccessor, NEActivationLayer, DataType::F32>; -} // namespace - -// F32 -BENCHMARK_DEFINE_F(ActivationLayerAlexNetF32, neon_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - act_layer.run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(ActivationLayerAlexNetF32, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerAlexNetF32, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerAlexNetF32, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 2, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerAlexNetF32, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 3, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerAlexNetF32, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 4, 1, 4, 8>); - -// QS8 -BENCHMARK_DEFINE_F(ActivationLayerAlexNetQS8, neon_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - act_layer.run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(ActivationLayerAlexNetQS8, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerAlexNetQS8, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerAlexNetQS8, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 2, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerAlexNetQS8, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 3, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerAlexNetQS8, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 4, 1, 4, 8>); - -BENCHMARK_DEFINE_F(ActivationLayerLeNet5, neon_lenet5) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - act_layer.run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(ActivationLayerLeNet5, neon_lenet5) -->Threads(1) -->Apply(DataSetArgBatched<LeNet5ActivationLayerDataset, 0, 1, 4, 8>); - -BENCHMARK_DEFINE_F(ActivationLayerGoogLeNet, neon_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - act_layer.run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 2, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 3, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 4, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 5, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 6, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 7, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 8, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 9, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 10, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 11, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 12, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 13, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 14, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 15, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 16, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 17, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 18, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 19, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 20, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 21, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 22, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 23, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 24, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 25, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 26, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 27, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 28, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 29, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 30, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 31, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 32, 1, 4, 8>); diff --git a/tests/benchmark/NEON/BitwiseAnd.cpp b/tests/benchmark/NEON/BitwiseAnd.cpp deleted file mode 100644 index be68fd39e4..0000000000 --- a/tests/benchmark/NEON/BitwiseAnd.cpp +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "Globals.h" -#include "NEON/NEAccessor.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEBitwiseAnd.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" - -#include "benchmark/benchmark_api.h" - -#include <memory> -#include <string> - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::neon; - -namespace -{ -template <typename DataSet> -class BitwiseAnd : public ::benchmark::Fixture -{ -public: - void SetUp(::benchmark::State &state) override - { - profiler.add(std::make_shared<WallClockTimer>()); - - const std::string image_name = *(DataSet().begin() + state.range(0)); - const RawTensor &raw = library->get(image_name); - - // Create tensors - src1 = create_tensor<Tensor>(raw.shape(), DataType::U8); - src2 = create_tensor<Tensor>(raw.shape(), DataType::U8); - dst = create_tensor<Tensor>(raw.shape(), DataType::U8); - - // Create and configure function - band.configure(&src1, &src2, &dst); - - // Allocate tensors - src1.allocator()->allocate(); - src2.allocator()->allocate(); - dst.allocator()->allocate(); - - // Fill source tensors - library->fill(NEAccessor(src1), image_name, Channel::R); - library->fill(NEAccessor(src2), image_name, Channel::G); - } - - void TearDown(::benchmark::State &state) override - { - profiler.submit(state); - } - - NEBitwiseAnd band{}; - Profiler profiler{}; - -private: - Tensor src1{}; - Tensor src2{}; - Tensor dst{}; -}; - -using BitwiseAndSmall = BitwiseAnd<SmallImages>; -using BitwiseAndLarge = BitwiseAnd<LargeImages>; -} // namespace - -BENCHMARK_DEFINE_F(BitwiseAndSmall, neon_bitwise_and) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - band.run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(BitwiseAndSmall, neon_bitwise_and) -->Threads(1) -->Apply(DataSetArgs<SmallImages>); - -BENCHMARK_DEFINE_F(BitwiseAndLarge, neon_bitwise_and) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - band.run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(BitwiseAndLarge, neon_bitwise_and) -->Threads(1) -->Apply(DataSetArgs<LargeImages>); diff --git a/tests/benchmark/NEON/CMakeLists.txt b/tests/benchmark/NEON/CMakeLists.txt deleted file mode 100644 index 2cb3eb36c9..0000000000 --- a/tests/benchmark/NEON/CMakeLists.txt +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) 2017 ARM Limited. -# -# SPDX-License-Identifier: MIT -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to -# deal in the Software without restriction, including without limitation the -# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -# sell copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -cmake_minimum_required (VERSION 3.1) - -set(arm_compute_test_benchmark_NEON_SOURCE_FILES - ${CMAKE_SOURCE_DIR}/NEON/NEAccessor.h - ${CMAKE_CURRENT_SOURCE_DIR}/Bitwise/BitwiseAnd.cpp -) - -add_library(arm_compute_test_benchmark_NEON OBJECT - ${arm_compute_test_benchmark_NEON_SOURCE_FILES} -) - -SET(arm_compute_test_benchmark_TARGET_OBJECTS - ${arm_compute_test_benchmark_TARGET_OBJECTS} - $<TARGET_OBJECTS:arm_compute_test_benchmark_NEON> - PARENT_SCOPE -) diff --git a/tests/benchmark/NEON/ConvolutionLayer.cpp b/tests/benchmark/NEON/ConvolutionLayer.cpp deleted file mode 100644 index a0b1236177..0000000000 --- a/tests/benchmark/NEON/ConvolutionLayer.cpp +++ /dev/null @@ -1,302 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "Globals.h" -#include "NEON/NEAccessor.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" - -#include "benchmark/benchmark_api.h" - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::neon; - -#include "benchmark/common/ConvolutionLayer.h" - -namespace -{ -using ConvolutionLayerAlexNetF32 = ConvolutionLayer<AlexNetConvolutionLayerDataset, Tensor, NEAccessor, NEConvolutionLayer>; -using ConvolutionLayerAlexNetQS8 = ConvolutionLayer<AlexNetConvolutionLayerDataset, Tensor, NEAccessor, NEConvolutionLayer, DataType::QS8>; -using ConvolutionLayerLeNet5 = ConvolutionLayer<LeNet5ConvolutionLayerDataset, Tensor, NEAccessor, NEConvolutionLayer>; -using ConvolutionLayerGoogLeNet1 = ConvolutionLayer<GoogLeNetConvolutionLayerDataset1, Tensor, NEAccessor, NEConvolutionLayer>; -using ConvolutionLayerGoogLeNet2 = ConvolutionLayer<GoogLeNetConvolutionLayerDataset2, Tensor, NEAccessor, NEConvolutionLayer>; -} // namespace - -// F32 -BENCHMARK_DEFINE_F(ConvolutionLayerAlexNetF32, neon_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - conv_layer->run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 2, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 3, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 4, 1, 4, 8>); - -// QS8 -BENCHMARK_DEFINE_F(ConvolutionLayerAlexNetQS8, neon_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - conv_layer->run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 2, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 3, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 4, 1, 4, 8>); - -BENCHMARK_DEFINE_F(ConvolutionLayerLeNet5, neon_lenet5) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - conv_layer->run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(ConvolutionLayerLeNet5, neon_lenet5) -->Threads(1) -->Apply(DataSetArgBatched<LeNet5ConvolutionLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerLeNet5, neon_lenet5) -->Threads(1) -->Apply(DataSetArgBatched<LeNet5ConvolutionLayerDataset, 1, 1, 4, 8>); - -BENCHMARK_DEFINE_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - conv_layer->run(); - profiler.stop(); - } -} - -BENCHMARK_DEFINE_F(ConvolutionLayerGoogLeNet2, neon_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - conv_layer->run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 2, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 3, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 4, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 5, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 6, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 7, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 8, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 9, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 10, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 11, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 12, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 13, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 14, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 15, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 16, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 17, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 18, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 19, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 20, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 21, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 22, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 23, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 24, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 25, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 26, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 27, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 28, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 29, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 30, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 31, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 2, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 3, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 4, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 5, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 6, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 7, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 8, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 9, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 10, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 11, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 12, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 13, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 14, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 15, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 16, 1, 4, 8>); diff --git a/tests/benchmark/NEON/ConvolutionLayerDirect.cpp b/tests/benchmark/NEON/ConvolutionLayerDirect.cpp deleted file mode 100644 index bbe5707e41..0000000000 --- a/tests/benchmark/NEON/ConvolutionLayerDirect.cpp +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "Globals.h" -#include "NEON/NEAccessor.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" -#include "dataset/ConvolutionLayerDataset.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" - -#include "benchmark/benchmark_api.h" - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::neon; - -#include "benchmark/common/ConvolutionLayer.h" - -namespace -{ -using ConvolutionLayerDirectAlexNet = ConvolutionLayer<AlexNetConvolutionLayerDataset, Tensor, NEAccessor, NEDirectConvolutionLayer>; -} // namespace - -BENCHMARK_DEFINE_F(ConvolutionLayerDirectAlexNet, neon_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - conv_layer->run(); - profiler.stop(); - } -} - -// Registr only the 3x3 convolution layers -BENCHMARK_REGISTER_F(ConvolutionLayerDirectAlexNet, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 2, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerDirectAlexNet, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 3, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerDirectAlexNet, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 4, 1, 4, 8>); diff --git a/tests/benchmark/NEON/FullyConnectedLayer.cpp b/tests/benchmark/NEON/FullyConnectedLayer.cpp deleted file mode 100644 index 15c2a407e6..0000000000 --- a/tests/benchmark/NEON/FullyConnectedLayer.cpp +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "Globals.h" -#include "NEON/NEAccessor.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" - -#include "benchmark/benchmark_api.h" - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::neon; - -#include "benchmark/common/FullyConnectedLayer.h" - -namespace -{ -using FullyConnectedLayerAlexNetF32 = FullyConnectedLayer<AlexNetFullyConnectedLayerDataset, Tensor, NEAccessor, NEFullyConnectedLayer>; -using FullyConnectedLayerAlexNetQS8 = FullyConnectedLayer<AlexNetFullyConnectedLayerDataset, Tensor, NEAccessor, NEFullyConnectedLayer, DataType::QS8>; -using FullyConnectedLayerLeNet5 = FullyConnectedLayer<LeNet5FullyConnectedLayerDataset, Tensor, NEAccessor, NEFullyConnectedLayer>; -using FullyConnectedLayerGoogLeNet = FullyConnectedLayer<GoogLeNetFullyConnectedLayerDataset, Tensor, NEAccessor, NEFullyConnectedLayer>; -} // namespace - -// F32 -BENCHMARK_DEFINE_F(FullyConnectedLayerAlexNetF32, neon_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - fc_layer->run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetF32, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetF32, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetF32, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 2, 1, 4, 8>); - -// QS8 -BENCHMARK_DEFINE_F(FullyConnectedLayerAlexNetQS8, neon_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - fc_layer->run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetQS8, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetQS8, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetQS8, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 2, 1, 4, 8>); - -BENCHMARK_DEFINE_F(FullyConnectedLayerLeNet5, neon_lenet5) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - fc_layer->run(); - profiler.stop(); - } -} -BENCHMARK_REGISTER_F(FullyConnectedLayerLeNet5, neon_lenet5) -->Threads(1) -->Apply(DataSetArgBatched<LeNet5FullyConnectedLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(FullyConnectedLayerLeNet5, neon_lenet5) -->Threads(1) -->Apply(DataSetArgBatched<LeNet5FullyConnectedLayerDataset, 1, 1, 4, 8>); - -BENCHMARK_DEFINE_F(FullyConnectedLayerGoogLeNet, neon_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - fc_layer->run(); - profiler.stop(); - } -} -BENCHMARK_REGISTER_F(FullyConnectedLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetFullyConnectedLayerDataset, 0, 1, 4, 8>); diff --git a/tests/benchmark/NEON/GEMM.cpp b/tests/benchmark/NEON/GEMM.cpp deleted file mode 100644 index 1655c32e2b..0000000000 --- a/tests/benchmark/NEON/GEMM.cpp +++ /dev/null @@ -1,777 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "Globals.h" -#include "NEON/NEAccessor.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEGEMM.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" - -#include "benchmark/benchmark_api.h" - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::neon; - -#include "benchmark/NEON/GEMM.h" - -namespace -{ -#ifdef ENABLE_FP16 -using GEMMFP16GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, Tensor, NEAccessor, NEGEMM, DataType::F16>; -using GEMMFP16GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, Tensor, NEAccessor, NEGEMM, DataType::F16>; -using FP16MatrixMultiply = GEMM<MatrixMultiplyDataset, Tensor, NEAccessor, NEGEMM, DataType::F16>; -#endif /* ENABLE_FP16 */ -using GEMMFP32GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, Tensor, NEAccessor, NEGEMM, DataType::F32>; -using GEMMFP32GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, Tensor, NEAccessor, NEGEMM, DataType::F32>; -using GEMMQS8GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, Tensor, NEAccessor, NEGEMM, DataType::QS8>; -using GEMMQS8GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, Tensor, NEAccessor, NEGEMM, DataType::QS8>; -using FP32MatrixMultiply = GEMM<MatrixMultiplyDataset, Tensor, NEAccessor, NEGEMM, DataType::F32>; -using QS8MatrixMultiply = GEMM<MatrixMultiplyDataset, Tensor, NEAccessor, NEGEMM, DataType::QS8>; -} // namespace -#ifdef ENABLE_FP16 -BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet1, neon_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - gemm_layer->run(); - profiler.stop(); - } -} - -BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet2, neon_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - gemm_layer->run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>); - -BENCHMARK_DEFINE_F(FP16MatrixMultiply, neon_matrix_multiply) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - gemm_layer->run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(FP16MatrixMultiply, neon_matrix_multiply) -->Threads(1) -->Apply(DataSetArg<MatrixMultiplyDataset, 0>); -BENCHMARK_REGISTER_F(FP16MatrixMultiply, neon_matrix_multiply) -->Threads(1) -->Apply(DataSetArg<MatrixMultiplyDataset, 1>); -BENCHMARK_REGISTER_F(FP16MatrixMultiply, neon_matrix_multiply) -->Threads(1) -->Apply(DataSetArg<MatrixMultiplyDataset, 2>); -#endif /* ENABLE_FP16 */ - -BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet1, neon_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - gemm_layer->run(); - profiler.stop(); - } -} - -BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet2, neon_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - gemm_layer->run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>); - -BENCHMARK_DEFINE_F(GEMMQS8GoogLeNet1, neon_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - gemm_layer->run(); - profiler.stop(); - } -} - -BENCHMARK_DEFINE_F(GEMMQS8GoogLeNet2, neon_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - gemm_layer->run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>); -BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>); - -BENCHMARK_DEFINE_F(FP32MatrixMultiply, neon_matrix_multiply) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - gemm_layer->run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(FP32MatrixMultiply, neon_matrix_multiply) -->Threads(1) -->Apply(DataSetArg<MatrixMultiplyDataset, 0>); -BENCHMARK_REGISTER_F(FP32MatrixMultiply, neon_matrix_multiply) -->Threads(1) -->Apply(DataSetArg<MatrixMultiplyDataset, 1>); -BENCHMARK_REGISTER_F(FP32MatrixMultiply, neon_matrix_multiply) -->Threads(1) -->Apply(DataSetArg<MatrixMultiplyDataset, 2>); - -BENCHMARK_DEFINE_F(QS8MatrixMultiply, neon_matrix_multiply) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - gemm_layer->run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(QS8MatrixMultiply, neon_matrix_multiply) -->Threads(1) -->Apply(DataSetArg<MatrixMultiplyDataset, 0>); -BENCHMARK_REGISTER_F(QS8MatrixMultiply, neon_matrix_multiply) -->Threads(1) -->Apply(DataSetArg<MatrixMultiplyDataset, 1>); -BENCHMARK_REGISTER_F(QS8MatrixMultiply, neon_matrix_multiply) -->Threads(1) -->Apply(DataSetArg<MatrixMultiplyDataset, 2>); diff --git a/tests/benchmark/NEON/GEMM.h b/tests/benchmark/NEON/GEMM.h deleted file mode 100644 index 4f0557d610..0000000000 --- a/tests/benchmark/NEON/GEMM.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_TEST_BENCHMARK_NEON_GEMM_H__ -#define __ARM_COMPUTE_TEST_BENCHMARK_NEON_GEMM_H__ - -#include "TensorLibrary.h" -#include "Utils.h" -#include "dataset/GEMMDataset.h" - -#include <memory> - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; - -namespace arm_compute -{ -namespace test -{ -namespace benchmark -{ -// FIXME: Merge with CL/GEMM.h into common/GEMM.h after adding F16 support to NEON GEMM and QS8 support to CL GEMM -template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType data_type> -class GEMM : public ::benchmark::Fixture -{ -public: - void SetUp(::benchmark::State &state) override - { -#ifdef ENABLE_FP16 - ARM_COMPUTE_ERROR_ON_MSG(data_type != DataType::F16 && data_type != DataType::F32 && data_type != DataType::QS8, "Unsupported data type for GEMM operation"); -#else /* ENABLE_FP16 */ - ARM_COMPUTE_ERROR_ON_MSG(data_type != DataType::F32 && data_type != DataType::QS8, "Unsupported data type for GEMM operation"); -#endif /* ENABLE_FP16 */ - - profiler.add(std::make_shared<WallClockTimer>()); - - const GEMMDataObject gemm_obj = *(DataSet().begin() + state.range(0)); - - TensorShape shape_a = gemm_obj.shape_a; - TensorShape shape_b = gemm_obj.shape_b; - TensorShape shape_c = gemm_obj.shape_c; - TensorShape shape_d = gemm_obj.shape_d; - - // Create tensors - a = create_tensor<Tensor>(shape_a, data_type, 1, 4); - b = create_tensor<Tensor>(shape_b, data_type, 1, 4); - c = create_tensor<Tensor>(shape_c, data_type, 1, 4); - d = create_tensor<Tensor>(shape_d, data_type, 1, 4); - - // Create and configure function - gemm_layer = std::unique_ptr<Function>(new Function()); - gemm_layer->configure(&a, &b, &c, &d, gemm_obj.alpha, gemm_obj.beta); - - // Allocate tensors - a.allocator()->allocate(); - b.allocator()->allocate(); - c.allocator()->allocate(); - d.allocator()->allocate(); - } - - void TearDown(::benchmark::State &state) override - { - gemm_layer.reset(); - - a.allocator()->free(); - b.allocator()->free(); - c.allocator()->free(); - d.allocator()->free(); - - profiler.submit(state); - } - - std::unique_ptr<Function> gemm_layer{ nullptr }; - Profiler profiler{}; - -private: - TensorType a{}; - TensorType b{}; - TensorType c{}; - TensorType d{}; -}; -} // namespace benchmark -} // namespace test -} // namespace arm_compute -#endif //__ARM_COMPUTE_TEST_BENCHMARK_NEON_GEMM_H__ diff --git a/tests/benchmark/NEON/NormalizationLayer.cpp b/tests/benchmark/NEON/NormalizationLayer.cpp deleted file mode 100644 index 8ea8c95cda..0000000000 --- a/tests/benchmark/NEON/NormalizationLayer.cpp +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "Globals.h" -#include "NEON/NEAccessor.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NENormalizationLayer.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" - -#include "benchmark/benchmark_api.h" - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::neon; - -#include "benchmark/common/NormalizationLayer.h" - -namespace -{ -using NormalizationLayerAlexNetF32 = NormalizationLayer<AlexNetNormalizationLayerDataset, Tensor, NEAccessor, NENormalizationLayer>; -using NormalizationLayerAlexNetQS8 = NormalizationLayer<AlexNetNormalizationLayerDataset, Tensor, NEAccessor, NENormalizationLayer, DataType::QS8>; -using NormalizationLayerGoogLeNet = NormalizationLayer<GoogLeNetNormalizationLayerDataset, Tensor, NEAccessor, NENormalizationLayer>; -} // namespace - -// F32 -BENCHMARK_DEFINE_F(NormalizationLayerAlexNetF32, neon_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - norm_layer->run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(NormalizationLayerAlexNetF32, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(NormalizationLayerAlexNetF32, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 1, 1, 4, 8>); - -// QS8 -BENCHMARK_DEFINE_F(NormalizationLayerAlexNetQS8, neon_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - norm_layer->run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(NormalizationLayerAlexNetQS8, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(NormalizationLayerAlexNetQS8, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 1, 1, 4, 8>); - -BENCHMARK_DEFINE_F(NormalizationLayerGoogLeNet, neon_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - norm_layer->run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(NormalizationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetNormalizationLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(NormalizationLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetNormalizationLayerDataset, 1, 1, 4, 8>); diff --git a/tests/benchmark/NEON/PoolingLayer.cpp b/tests/benchmark/NEON/PoolingLayer.cpp deleted file mode 100644 index dfa75497cd..0000000000 --- a/tests/benchmark/NEON/PoolingLayer.cpp +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "Globals.h" -#include "NEON/NEAccessor.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" - -#include "benchmark/benchmark_api.h" - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::neon; - -#include "benchmark/common/PoolingLayer.h" - -namespace -{ -using PoolingLayerAlexNetF32 = PoolingLayer<AlexNetPoolingLayerDataset, Tensor, NEAccessor, NEPoolingLayer>; -using PoolingLayerAlexNetQS8 = PoolingLayer<AlexNetPoolingLayerDataset, Tensor, NEAccessor, NEPoolingLayer, DataType::QS8>; -using PoolingLayerLeNet5 = PoolingLayer<LeNet5PoolingLayerDataset, Tensor, NEAccessor, NEPoolingLayer>; -using PoolingLayerGoogLeNet = PoolingLayer<GoogLeNetPoolingLayerDataset, Tensor, NEAccessor, NEPoolingLayer>; -} // namespace - -// F32 -BENCHMARK_DEFINE_F(PoolingLayerAlexNetF32, neon_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - pool_layer.run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(PoolingLayerAlexNetF32, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerAlexNetF32, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerAlexNetF32, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 2, 1, 4, 8>); - -// QS8 -BENCHMARK_DEFINE_F(PoolingLayerAlexNetQS8, neon_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - pool_layer.run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(PoolingLayerAlexNetQS8, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerAlexNetQS8, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerAlexNetQS8, neon_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 2, 1, 4, 8>); - -BENCHMARK_DEFINE_F(PoolingLayerLeNet5, neon_lenet5) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - pool_layer.run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(PoolingLayerLeNet5, neon_lenet5) -->Threads(1) -->Apply(DataSetArgBatched<LeNet5PoolingLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerLeNet5, neon_lenet5) -->Threads(1) -->Apply(DataSetArgBatched<LeNet5PoolingLayerDataset, 1, 1, 4, 8>); - -BENCHMARK_DEFINE_F(PoolingLayerGoogLeNet, neon_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - pool_layer.run(); - profiler.stop(); - } -} - -// FIXME: Add support for 7x7 pooling layer pool5/7x7_s1 -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 2, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 3, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 4, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 5, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 6, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 7, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 8, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 9, 1, 4, 8>); diff --git a/tests/benchmark/PMUCounter.cpp b/tests/benchmark/PMUCounter.cpp deleted file mode 100644 index 6d59dae65d..0000000000 --- a/tests/benchmark/PMUCounter.cpp +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "PMUCounter.h" - -#include "Utils.h" -#include "support/ToolchainSupport.h" - -#define _GNU_SOURCE 1 -#include <asm/unistd.h> -#include <csignal> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include <fcntl.h> -#include <linux/hw_breakpoint.h> -#include <linux/perf_event.h> -#include <stdexcept> -#include <sys/ioctl.h> -#include <unistd.h> - -namespace arm_compute -{ -namespace test -{ -namespace benchmark -{ -CycleCounter::CycleCounter() -{ - const pid_t pid = getpid(); - - struct perf_event_attr perf_config - { - }; - memset(&perf_config, 0, sizeof(struct perf_event_attr)); - - perf_config.config = PERF_COUNT_HW_CPU_CYCLES; - perf_config.size = sizeof(struct perf_event_attr); - perf_config.type = PERF_TYPE_HARDWARE; - // The inherit bit specifies that this counter should count events of child - // tasks as well as the task specified - perf_config.inherit = 1; - // Enables saving of event counts on context switch for inherited tasks - perf_config.inherit_stat = 1; - - _fd = syscall(__NR_perf_event_open, &perf_config, pid, -1, -1, 0); - - if(_fd < 0) - { - throw std::runtime_error("perf_event_open for cycles failed"); - } -} - -std::string CycleCounter::id() const -{ - return "Cycle Counter"; -} - -void CycleCounter::start() -{ - ioctl(_fd, PERF_EVENT_IOC_RESET, 0); - ioctl(_fd, PERF_EVENT_IOC_ENABLE, 0); -} - -void CycleCounter::stop() -{ - ioctl(_fd, PERF_EVENT_IOC_DISABLE, 0); - read(_fd, &_cycles, sizeof(_cycles)); -} - -std::unique_ptr<Instrument::IMeasurement> CycleCounter::get_measurement() const -{ - return support::cpp14::make_unique<Instrument::Measurement<long long>>(_cycles); -} - -InstructionCounter::InstructionCounter() -{ - const pid_t pid = getpid(); - - struct perf_event_attr perf_config - { - }; - memset(&perf_config, 0, sizeof(struct perf_event_attr)); - - perf_config.config = PERF_COUNT_HW_INSTRUCTIONS; - perf_config.size = sizeof(struct perf_event_attr); - perf_config.type = PERF_TYPE_HARDWARE; - // The inherit bit specifies that this counter should count events of child - // tasks as well as the task specified - perf_config.inherit = 1; - // Enables saving of event counts on context switch for inherited tasks - perf_config.inherit_stat = 1; - - _fd = syscall(__NR_perf_event_open, &perf_config, pid, -1, -1, 0); - - if(_fd < 0) - { - throw std::runtime_error("perf_event_open for instructions failed"); - } -} - -std::string InstructionCounter::id() const -{ - return "Instruction Counter"; -} - -void InstructionCounter::start() -{ - ioctl(_fd, PERF_EVENT_IOC_RESET, 0); - ioctl(_fd, PERF_EVENT_IOC_ENABLE, 0); -} - -void InstructionCounter::stop() -{ - ioctl(_fd, PERF_EVENT_IOC_DISABLE, 0); - read(_fd, &_instructions, sizeof(_instructions)); -} - -std::unique_ptr<Instrument::IMeasurement> InstructionCounter::get_measurement() const -{ - return std::unique_ptr<Instrument::IMeasurement>(new Instrument::Measurement<long long>(_instructions)); -} -} // namespace benchmark -} // namespace test -} // namespace arm_compute diff --git a/tests/benchmark/PMUCounter.h b/tests/benchmark/PMUCounter.h deleted file mode 100644 index c04b0e5760..0000000000 --- a/tests/benchmark/PMUCounter.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_TEST_BENCHMARK_PMU_COUNTER_H__ -#define __ARM_COMPUTE_TEST_BENCHMARK_PMU_COUNTER_H__ - -#include "Instrument.h" - -namespace arm_compute -{ -namespace test -{ -namespace benchmark -{ -/** Implementation of an instrument to count CPU cycles. */ -class CycleCounter : public Instrument -{ -public: - /** Initialise the cycle counter. */ - CycleCounter(); - - std::string id() const override; - void start() override; - void stop() override; - std::unique_ptr<Instrument::IMeasurement> get_measurement() const override; - -private: - long _fd{ -1 }; - long long _cycles{ 0 }; -}; - -/** Implementation of an instrument to count executed CPU instructions. */ -class InstructionCounter : public Instrument -{ -public: - /** Initialise the instruction counter. */ - InstructionCounter(); - - std::string id() const override; - void start() override; - void stop() override; - std::unique_ptr<Instrument::IMeasurement> get_measurement() const override; - -private: - long _fd{ -1 }; - long long _instructions{ 0 }; -}; -} // namespace benchmark -} // namespace test -} // namespace arm_compute -#endif /* __ARM_COMPUTE_TEST_BENCHMARK_PMU_COUNTER_H__ */ diff --git a/tests/benchmark/PerformanceProgramOptions.cpp b/tests/benchmark/PerformanceProgramOptions.cpp deleted file mode 100644 index b4becc3c69..0000000000 --- a/tests/benchmark/PerformanceProgramOptions.cpp +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "PerformanceProgramOptions.h" - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Weffc++" -#pragma GCC diagnostic ignored "-Wnon-virtual-dtor" -#pragma GCC diagnostic ignored "-Wctor-dtor-privacy" -#include "boost/program_options.hpp" -#pragma GCC diagnostic pop - -namespace arm_compute -{ -namespace test -{ -namespace performance -{ -PerformanceProgramOptions::PerformanceProgramOptions() -{ - boost::program_options::options_description options("Performance options"); - options.add_options()("runs", boost::program_options::value<unsigned int>()->default_value(1), "Repetitions per test"); - options.add_options()("threads", boost::program_options::value<unsigned int>()->default_value(1), "Number of parallel CPU threads"); - add_options(options); -} -} // namespace performance -} // namespace test -} // namespace arm_compute diff --git a/tests/benchmark/PerformanceProgramOptions.h b/tests/benchmark/PerformanceProgramOptions.h deleted file mode 100644 index e9c7a38b3f..0000000000 --- a/tests/benchmark/PerformanceProgramOptions.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_TEST_PERFORMANCE_PROGRAM_OPTIONS_H__ -#define __ARM_COMPUTE_TEST_PERFORMANCE_PROGRAM_OPTIONS_H__ - -#include "ProgramOptions.h" - -namespace arm_compute -{ -namespace test -{ -namespace performance -{ -/** Subclass of @ref ProgramOptions that adds performance specific options. */ -class PerformanceProgramOptions : public ProgramOptions -{ -public: - /** Defines additonal options. */ - PerformanceProgramOptions(); -}; -} // namespace performance -} // namespace test -} // namespace arm_compute -#endif /* __ARM_COMPUTE_TEST_PERFORMANCE_PROGRAM_OPTIONS_H__ */ diff --git a/tests/benchmark/PerformanceUserConfiguration.cpp b/tests/benchmark/PerformanceUserConfiguration.cpp deleted file mode 100644 index ca412d660a..0000000000 --- a/tests/benchmark/PerformanceUserConfiguration.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "PerformanceUserConfiguration.h" - -#include "ProgramOptions.h" - -namespace arm_compute -{ -namespace test -{ -namespace performance -{ -PerformanceUserConfiguration::PerformanceUserConfiguration(const ProgramOptions &options) - : UserConfiguration(options) -{ - unsigned int tmp_runs = 0; - if(options.get("runs", tmp_runs)) - { - runs = tmp_runs; - } -} -} // namespace performance -} // namespace test -} // namespace arm_compute diff --git a/tests/benchmark/PerformanceUserConfiguration.h b/tests/benchmark/PerformanceUserConfiguration.h deleted file mode 100644 index daf85a4332..0000000000 --- a/tests/benchmark/PerformanceUserConfiguration.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_TEST_PERFORMANCE_PERFORMANCE_USER_CONFIGURATION_H__ -#define __ARM_COMPUTE_TEST_PERFORMANCE_PERFORMANCE_USER_CONFIGURATION_H__ - -#include "UserConfiguration.h" - -namespace arm_compute -{ -namespace test -{ -class ProgramOptions; - -namespace performance -{ -/** Specialisation of @ref UserConfiguration to provide performance specific - * configuration options. - */ -struct PerformanceUserConfiguration : public UserConfiguration -{ - PerformanceUserConfiguration() = default; - - /** Initialise the configuration according to the program options. - * - * @param[in] options Parsed command line options. - */ - PerformanceUserConfiguration(const ProgramOptions &options); - - Option<unsigned int> runs{}; -}; -} // namespace performance - -extern performance::PerformanceUserConfiguration user_config; -} // namespace test -} // namespace arm_compute -#endif /* __ARM_COMPUTE_TEST_PERFORMANCE_PERFORMANCE_USER_CONFIGURATION_H__ */ diff --git a/tests/benchmark/Profiler.cpp b/tests/benchmark/Profiler.cpp deleted file mode 100644 index f3ce94164f..0000000000 --- a/tests/benchmark/Profiler.cpp +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "Profiler.h" - -#include <iostream> -#include <utility> - -namespace arm_compute -{ -namespace test -{ -namespace benchmark -{ -void Profiler::add(const std::shared_ptr<Instrument> &instrument) -{ - _instruments.push_back(instrument); -} - -void Profiler::start() -{ - for(auto &instrument : _instruments) - { - instrument->start(); - } -} - -void Profiler::stop() -{ - for(auto &instrument : _instruments) - { - instrument->stop(); - } - - for(const auto &instrument : _instruments) - { - _measurements[instrument->id()].push_back(*instrument->get_measurement()); - } -} - -void Profiler::submit(::benchmark::State &state) -{ - for(auto &instrument : _measurements) - { - double sum_values = std::accumulate(instrument.second.begin(), instrument.second.end(), 0.); - size_t num_values = instrument.second.size(); - - if(num_values > 2) - { - auto minmax_values = std::minmax_element(instrument.second.begin(), instrument.second.end()); - state.counters[instrument.first + "_min"] = *minmax_values.first; - state.counters[instrument.first + "_max"] = *minmax_values.second; - sum_values -= *minmax_values.first + *minmax_values.second; - num_values -= 2; - } - state.counters[instrument.first] = sum_values / num_values; - instrument.second.clear(); - } -} - -const Profiler::MeasurementsMap &Profiler::measurements() const -{ - return _measurements; -} -} // namespace benchmark -} // namespace test -} // namespace arm_compute diff --git a/tests/benchmark/Profiler.h b/tests/benchmark/Profiler.h deleted file mode 100644 index f2464949d0..0000000000 --- a/tests/benchmark/Profiler.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_TEST_BENCHMARK_PROFILER_H__ -#define __ARM_COMPUTE_TEST_BENCHMARK_PROFILER_H__ - -#include "Instrument.h" - -#include "benchmark/benchmark_api.h" - -#include <map> -#include <memory> -#include <string> -#include <vector> - -namespace arm_compute -{ -namespace test -{ -namespace benchmark -{ -class Profiler -{ -public: - /** Mapping from instrument ids to their measurements. */ - using MeasurementsMap = std::map<std::string, std::vector<double>>; - - /** Add @p instrument to the performance montior. - * - * All added instruments will be used when @ref start or @ref stop are - * called to make measurements. - * - * @param[in] instrument Instrument to be used to measure performance. - */ - void add(const std::shared_ptr<Instrument> &instrument); - - /** Start all added instruments to measure performance. */ - void start(); - - /** Stop all added instruments. */ - void stop(); - - /** Commit all measured values to the current active test. */ - void submit(::benchmark::State &state); - - /** Return measurements for all instruments. */ - const MeasurementsMap &measurements() const; - -private: - std::vector<std::shared_ptr<Instrument>> _instruments{}; - MeasurementsMap _measurements{}; -}; -} // namespace benchmark -} // namespace test -} // namespace arm_compute -#endif /* __ARM_COMPUTE_TEST_BENCHMARK_PROFILER_H__ */ diff --git a/tests/benchmark/WallClockTimer.cpp b/tests/benchmark/WallClockTimer.cpp deleted file mode 100644 index 717fe04b31..0000000000 --- a/tests/benchmark/WallClockTimer.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "WallClockTimer.h" - -#include "Utils.h" -#include "support/ToolchainSupport.h" - -namespace arm_compute -{ -namespace test -{ -namespace benchmark -{ -std::string WallClockTimer::id() const -{ - return "Wall clock"; -} - -void WallClockTimer::start() -{ - _start = std::chrono::high_resolution_clock::now(); -} - -void WallClockTimer::stop() -{ - _stop = std::chrono::high_resolution_clock::now(); -} - -std::unique_ptr<Instrument::IMeasurement> WallClockTimer::get_measurement() const -{ - const std::chrono::duration<float, std::milli> delta = _stop - _start; - return support::cpp14::make_unique<Instrument::Measurement<float>>(delta.count()); -} -} // namespace benchmark -} // namespace test -} // namespace arm_compute diff --git a/tests/benchmark/WallClockTimer.h b/tests/benchmark/WallClockTimer.h deleted file mode 100644 index 85a8b86213..0000000000 --- a/tests/benchmark/WallClockTimer.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_TEST_BENCHMARK_WALL_CLOCK_TIMER_H__ -#define __ARM_COMPUTE_TEST_BENCHMARK_WALL_CLOCK_TIMER_H__ - -#include "Instrument.h" - -#include <chrono> - -namespace arm_compute -{ -namespace test -{ -namespace benchmark -{ -/** Implementation of an instrument to measure elapsed wall-clock time in milliseconds. */ -class WallClockTimer : public Instrument -{ -public: - std::string id() const override; - void start() override; - void stop() override; - std::unique_ptr<Instrument::IMeasurement> get_measurement() const override; - -private: - std::chrono::high_resolution_clock::time_point _start{}; - std::chrono::high_resolution_clock::time_point _stop{}; -}; -} // namespace benchmark -} // namespace test -} // namespace arm_compute -#endif /* __ARM_COMPUTE_TEST_BENCHMARK_WALL_CLOCK_TIMER_H__ */ diff --git a/tests/benchmark/common/ActivationLayer.h b/tests/benchmark/common/ActivationLayer.h deleted file mode 100644 index 8e22281025..0000000000 --- a/tests/benchmark/common/ActivationLayer.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_TEST_BENCHMARK_ACTIVATION_LAYER_H__ -#define __ARM_COMPUTE_TEST_BENCHMARK_ACTIVATION_LAYER_H__ - -#include "TensorLibrary.h" -#include "Utils.h" -#include "dataset/ActivationLayerDataset.h" - -#include <memory> - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; - -namespace arm_compute -{ -namespace test -{ -namespace benchmark -{ -template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType dt = DataType::F32> -class ActivationLayer : public ::benchmark::Fixture -{ -public: - void SetUp(::benchmark::State &state) override - { - profiler.add(std::make_shared<WallClockTimer>()); - - const ActivationLayerDataObject act_obj = *(DataSet().begin() + state.range(0)); - - // Set batched in source and destination shapes - const unsigned int batches = state.range(1); - const unsigned int fixed_point_position = 4; - TensorShape shape = act_obj.shape; - shape.set(shape.num_dimensions(), batches); - - // Create tensors - src = create_tensor<TensorType>(shape, dt, 1, fixed_point_position); - dst = create_tensor<TensorType>(shape, dt, 1, fixed_point_position); - - // Create and configure function - act_layer.configure(&src, &dst, act_obj.info); - - // Allocate tensors - src.allocator()->allocate(); - dst.allocator()->allocate(); - - // Fill tensors - library->fill_tensor_uniform(Accessor(src), 0); - } - - void TearDown(::benchmark::State &state) override - { - src.allocator()->free(); - dst.allocator()->free(); - - profiler.submit(state); - } - - Function act_layer{}; - Profiler profiler{}; - -private: - TensorType src{}; - TensorType dst{}; -}; -} // namespace benchmark -} // namespace test -} // namespace arm_compute -#endif //__ARM_COMPUTE_TEST_BENCHMARK_ACTIVATION_LAYER_H__ diff --git a/tests/benchmark/common/ConvolutionLayer.h b/tests/benchmark/common/ConvolutionLayer.h deleted file mode 100644 index a777a95bde..0000000000 --- a/tests/benchmark/common/ConvolutionLayer.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_TEST_BENCHMARK_CONVOLUTION_LAYER_H__ -#define __ARM_COMPUTE_TEST_BENCHMARK_CONVOLUTION_LAYER_H__ - -#include "TensorLibrary.h" -#include "Utils.h" -#include "dataset/ConvolutionLayerDataset.h" - -#include <memory> - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; - -namespace arm_compute -{ -namespace test -{ -namespace benchmark -{ -template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType dt = DataType::F32> -class ConvolutionLayer : public ::benchmark::Fixture -{ -public: - void SetUp(::benchmark::State &state) override - { - profiler.add(std::make_shared<WallClockTimer>()); - - const ConvolutionLayerDataObject conv_obj = *(DataSet().begin() + state.range(0)); - - // Set batched in source and destination shapes - const unsigned int batches = state.range(1); - const unsigned int fixed_point_position = 4; - TensorShape src_shape = conv_obj.src_shape; - TensorShape dst_shape = conv_obj.dst_shape; - src_shape.set(3 /* batch */, batches); - dst_shape.set(3 /* batch */, batches); - - // Create tensors - src = create_tensor<TensorType>(src_shape, dt, 1, fixed_point_position); - weights = create_tensor<TensorType>(conv_obj.weights_shape, dt, 1, fixed_point_position); - bias = create_tensor<TensorType>(conv_obj.bias_shape, dt, 1, fixed_point_position); - dst = create_tensor<TensorType>(dst_shape, dt, 1, fixed_point_position); - - // Create and configure function - conv_layer = std::unique_ptr<Function>(new Function()); - conv_layer->configure(&src, &weights, &bias, &dst, conv_obj.info); - - // Allocate tensors - src.allocator()->allocate(); - weights.allocator()->allocate(); - bias.allocator()->allocate(); - dst.allocator()->allocate(); - - // Fill tensors - library->fill_tensor_uniform(Accessor(src), 0); - library->fill_tensor_uniform(Accessor(weights), 1); - library->fill_tensor_uniform(Accessor(bias), 2); - } - - void TearDown(::benchmark::State &state) override - { - conv_layer.reset(); - - src.allocator()->free(); - weights.allocator()->free(); - bias.allocator()->free(); - dst.allocator()->free(); - - profiler.submit(state); - } - - std::unique_ptr<Function> conv_layer{ nullptr }; - Profiler profiler{}; - -private: - TensorType src{}; - TensorType weights{}; - TensorType bias{}; - TensorType dst{}; -}; -} // namespace benchmark -} // namespace test -} // namespace arm_compute -#endif //__ARM_COMPUTE_TEST_BENCHMARK_CONVOLUTION_LAYER_H__ diff --git a/tests/benchmark/common/FullyConnectedLayer.h b/tests/benchmark/common/FullyConnectedLayer.h deleted file mode 100644 index eeef1de28a..0000000000 --- a/tests/benchmark/common/FullyConnectedLayer.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_TEST_BENCHMARK_FULLYCONNECTED_LAYER_H__ -#define __ARM_COMPUTE_TEST_BENCHMARK_FULLYCONNECTED_LAYER_H__ - -#include "TensorLibrary.h" -#include "Utils.h" -#include "dataset/ConvolutionLayerDataset.h" - -#include <memory> -#include <string> - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; - -namespace arm_compute -{ -namespace test -{ -namespace benchmark -{ -template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType dt = DataType::F32> -class FullyConnectedLayer : public ::benchmark::Fixture -{ -public: - void SetUp(::benchmark::State &state) override - { - profiler.add(std::make_shared<WallClockTimer>()); - - const FullyConnectedLayerDataObject fc_obj = *(DataSet().begin() + state.range(0)); - - // Set batched in source and destination shapes - const unsigned int batches = state.range(1); - const unsigned int fixed_point_position = 4; - TensorShape src_shape = fc_obj.src_shape; - TensorShape dst_shape = fc_obj.dst_shape; - src_shape.set(src_shape.num_dimensions(), batches); - dst_shape.set(dst_shape.num_dimensions(), batches); - - // Create tensors - src = create_tensor<TensorType>(src_shape, dt, 1, fixed_point_position); - weights = create_tensor<TensorType>(fc_obj.weights_shape, dt, 1, fixed_point_position); - bias = create_tensor<TensorType>(fc_obj.bias_shape, dt, 1, fixed_point_position); - dst = create_tensor<TensorType>(dst_shape, dt, 1, fixed_point_position); - - // Create and configure function - fc_layer = std::unique_ptr<Function>(new Function()); - fc_layer->configure(&src, &weights, &bias, &dst); - - // Allocate tensors - src.allocator()->allocate(); - weights.allocator()->allocate(); - bias.allocator()->allocate(); - dst.allocator()->allocate(); - - // Fill tensors - library->fill_tensor_uniform(Accessor(src), 0); - library->fill_tensor_uniform(Accessor(weights), 1); - library->fill_tensor_uniform(Accessor(bias), 2); - } - - void TearDown(::benchmark::State &state) override - { - fc_layer.reset(); - - src.allocator()->free(); - weights.allocator()->free(); - bias.allocator()->free(); - dst.allocator()->free(); - - profiler.submit(state); - } - - std::unique_ptr<Function> fc_layer{ nullptr }; - Profiler profiler{}; - -private: - TensorType src{}; - TensorType weights{}; - TensorType bias{}; - TensorType dst{}; -}; -} // namespace benchmark -} // namespace test -} // namespace arm_compute -#endif //__ARM_COMPUTE_TEST_BENCHMARK_FULLYCONNECTED_LAYER_H__ diff --git a/tests/benchmark/common/NormalizationLayer.h b/tests/benchmark/common/NormalizationLayer.h deleted file mode 100644 index 580dca6c48..0000000000 --- a/tests/benchmark/common/NormalizationLayer.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_TEST_BENCHMARK_NORMALIZATION_LAYER_H__ -#define __ARM_COMPUTE_TEST_BENCHMARK_NORMALIZATION_LAYER_H__ - -#include "TensorLibrary.h" -#include "Utils.h" -#include "dataset/NormalizationLayerDataset.h" - -#include <memory> -#include <string> - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; - -namespace arm_compute -{ -namespace test -{ -namespace benchmark -{ -template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType dt = DataType::F32> -class NormalizationLayer : public ::benchmark::Fixture -{ -public: - void SetUp(::benchmark::State &state) override - { - profiler.add(std::make_shared<WallClockTimer>()); - - const NormalizationLayerDataObject norm_obj = *(DataSet().begin() + state.range(0)); - - // Set batched in source and destination shapes - const unsigned int batches = state.range(1); - const unsigned int fixed_point_position = 4; - TensorShape shape = norm_obj.shape; - shape.set(shape.num_dimensions(), batches); - - // Create tensors - src = create_tensor<TensorType>(shape, dt, 1, fixed_point_position); - dst = create_tensor<TensorType>(shape, dt, 1, fixed_point_position); - - // Create and configure function - norm_layer = std::unique_ptr<Function>(new Function()); - norm_layer->configure(&src, &dst, norm_obj.info); - - // Allocate tensors - src.allocator()->allocate(); - dst.allocator()->allocate(); - - // Fill tensors - library->fill_tensor_uniform(Accessor(src), 0); - } - - void TearDown(::benchmark::State &state) override - { - norm_layer.reset(); - - src.allocator()->free(); - dst.allocator()->free(); - - profiler.submit(state); - } - - std::unique_ptr<Function> norm_layer{ nullptr }; - Profiler profiler{}; - -private: - TensorType src{}; - TensorType dst{}; -}; -} // namespace benchmark -} // namespace test -} // namespace arm_compute -#endif //__ARM_COMPUTE_TEST_BENCHMARK_NORMALIZATION_LAYER_H__ diff --git a/tests/benchmark/common/PoolingLayer.h b/tests/benchmark/common/PoolingLayer.h deleted file mode 100644 index 96ff905568..0000000000 --- a/tests/benchmark/common/PoolingLayer.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_TEST_BENCHMARK_POOLING_LAYER_H__ -#define __ARM_COMPUTE_TEST_BENCHMARK_POOLING_LAYER_H__ - -#include "TensorLibrary.h" -#include "Utils.h" -#include "dataset/PoolingLayerDataset.h" - -#include <memory> - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; - -namespace arm_compute -{ -namespace test -{ -namespace benchmark -{ -template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType dt = DataType::F32> -class PoolingLayer : public ::benchmark::Fixture -{ -public: - void SetUp(::benchmark::State &state) override - { - profiler.add(std::make_shared<WallClockTimer>()); - - const PoolingLayerDataObject pool_obj = *(DataSet().begin() + state.range(0)); - - // Set batched in source and destination shapes - const unsigned int batches = state.range(1); - const unsigned int fixed_point_position = 4; - TensorShape src_shape = pool_obj.src_shape; - TensorShape dst_shape = pool_obj.dst_shape; - src_shape.set(src_shape.num_dimensions(), batches); - dst_shape.set(dst_shape.num_dimensions(), batches); - - // Create tensors - src = create_tensor<TensorType>(src_shape, dt, 1, fixed_point_position); - dst = create_tensor<TensorType>(dst_shape, dt, 1, fixed_point_position); - - // Create and configure function - pool_layer.configure(&src, &dst, pool_obj.info); - - // Allocate tensors - src.allocator()->allocate(); - dst.allocator()->allocate(); - - // Fill tensors - library->fill_tensor_uniform(Accessor(src), 0); - } - - void TearDown(::benchmark::State &state) override - { - // Free allocators - src.allocator()->free(); - dst.allocator()->free(); - - profiler.submit(state); - } - - Function pool_layer{}; - Profiler profiler{}; - -private: - TensorType src{}; - TensorType dst{}; -}; -} // namespace benchmark -} // namespace test -} // namespace arm_compute -#endif //__ARM_COMPUTE_TEST_BENCHMARK_POOLING_LAYER_H__ diff --git a/tests/benchmark/main.cpp b/tests/benchmark/main.cpp deleted file mode 100644 index 356490960c..0000000000 --- a/tests/benchmark/main.cpp +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "Globals.h" -#include "PMUCounter.h" -#include "PerformanceProgramOptions.h" -#include "PerformanceUserConfiguration.h" -#include "TensorLibrary.h" -#include "Utils.h" -#include "WallClockTimer.h" - -#include "benchmark/benchmark_api.h" -#include "support/ToolchainSupport.h" - -#ifdef OPENCL -#include "arm_compute/runtime/CL/CLScheduler.h" -#endif /* OPENCL */ -#include "arm_compute/runtime/Scheduler.h" - -#include <iostream> -#include <memory> - -using namespace arm_compute::test; -using namespace arm_compute::test::performance; - -namespace arm_compute -{ -namespace test -{ -PerformanceUserConfiguration user_config; -std::unique_ptr<TensorLibrary> library; -} // namespace test -} // namespace arm_compute - -int main(int argc, char **argv) -{ - PerformanceProgramOptions options; - try - { - options.parse_commandline(argc, argv); - - if(options.wants_help()) - { - std::cout << "Usage: " << argv[0] << " [options] PATH\n"; - std::cout << options.get_help() << "\n"; - } - - user_config = PerformanceUserConfiguration(options); - } - catch(const boost::program_options::required_option &err) - { - std::cerr << "Error: " << err.what() << "\n"; - std::cout << "\nUsage: " << argv[0] << " [options] PATH\n"; - std::cout << options.get_help() << "\n"; - return 1; - } - - ::benchmark::Initialize(&argc, argv); - - if(user_config.seed.is_set()) - { - library = arm_compute::support::cpp14::make_unique<TensorLibrary>(user_config.path.get(), user_config.seed); - } - else - { - library = arm_compute::support::cpp14::make_unique<TensorLibrary>(user_config.path.get()); - } - -#ifdef OPENCL - arm_compute::CLScheduler::get().default_init(); -#endif /* OPENCL */ - - std::cout << "Using " << user_config.threads << " CPU " << (user_config.threads == 1 ? "thread" : "threads") << "\n"; - std::cout << "Seed: " << library->seed() << "\n"; - arm_compute::Scheduler::get().set_num_threads(user_config.threads); - - ::benchmark::RunSpecifiedBenchmarks(); -} diff --git a/tests/benchmark/system_tests/CL/AlexNet.cpp b/tests/benchmark/system_tests/CL/AlexNet.cpp deleted file mode 100644 index 04b65b8c39..0000000000 --- a/tests/benchmark/system_tests/CL/AlexNet.cpp +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "CL/CLAccessor.h" -#include "Globals.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/CLSubTensor.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLActivationLayer.h" -#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" -#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" -#include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h" -#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h" -#include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h" - -#include "benchmark/benchmark_api.h" - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::cl; - -#include "benchmark/system_tests/common/AlexNet.h" - -namespace -{ -using AlexNetSystemTest = AlexNetFixture<ICLTensor, - CLTensor, - CLSubTensor, - CLAccessor, - CLActivationLayer, - CLConvolutionLayer, - CLFullyConnectedLayer, - CLNormalizationLayer, - CLPoolingLayer, - CLSoftmaxLayer>; -} // namespace - -BENCHMARK_DEFINE_F(AlexNetSystemTest, cl_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run AlexNet - profiler.start(); - network.run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(AlexNetSystemTest, cl_alexnet) -->Threads(1) -->Iterations(10) -->ArgName("batch_size") -->Arg(1) -->Arg(4) -->Arg(8);
\ No newline at end of file diff --git a/tests/benchmark/system_tests/CL/LeNet5.cpp b/tests/benchmark/system_tests/CL/LeNet5.cpp deleted file mode 100644 index 60bb37a35b..0000000000 --- a/tests/benchmark/system_tests/CL/LeNet5.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "CL/CLAccessor.h" -#include "Globals.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLActivationLayer.h" -#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" -#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" -#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h" -#include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h" - -#include "benchmark/benchmark_api.h" - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::cl; - -#include "benchmark/system_tests/common/LeNet5.h" - -namespace -{ -using LeNet5SystemTest = LeNet5Fixture<CLTensor, - CLAccessor, - CLActivationLayer, - CLConvolutionLayer, - CLFullyConnectedLayer, - CLPoolingLayer, - CLSoftmaxLayer>; -} // namespace - -BENCHMARK_DEFINE_F(LeNet5SystemTest, cl_lenet5) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run LeNet5 - profiler.start(); - network.run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(LeNet5SystemTest, cl_lenet5) -->Threads(1) -->Iterations(10) -->ArgName("batch_size") -->Arg(1) -->Arg(16) -->Arg(32); diff --git a/tests/benchmark/system_tests/NEON/AlexNet.cpp b/tests/benchmark/system_tests/NEON/AlexNet.cpp deleted file mode 100644 index 530c400ae3..0000000000 --- a/tests/benchmark/system_tests/NEON/AlexNet.cpp +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "Globals.h" -#include "NEON/NEAccessor.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" -#include "arm_compute/runtime/NEON/functions/NENormalizationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h" -#include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h" -#include "arm_compute/runtime/SubTensor.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" - -#include "benchmark/benchmark_api.h" - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::neon; - -#include "benchmark/system_tests/common/AlexNet.h" - -namespace -{ -using AlexNetSystemTestF32 = AlexNetFixture<ITensor, - Tensor, - SubTensor, - NEAccessor, - NEActivationLayer, - NEConvolutionLayer, - NEFullyConnectedLayer, - NENormalizationLayer, - NEPoolingLayer, - NESoftmaxLayer, - DataType::F32>; - -using AlexNetSystemTestQS8 = AlexNetFixture<ITensor, - Tensor, - SubTensor, - NEAccessor, - NEActivationLayer, - NEConvolutionLayer, - NEFullyConnectedLayer, - NENormalizationLayer, - NEPoolingLayer, - NESoftmaxLayer, - DataType::QS8>; -} // namespace - -// F32 -BENCHMARK_DEFINE_F(AlexNetSystemTestF32, neon_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run AlexNet - profiler.start(); - network.run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(AlexNetSystemTestF32, neon_alexnet) -->Threads(1) -->Iterations(10) -->ArgName("batch_size") -->Arg(1) -->Arg(4) -->Arg(8); - -// QS8 -BENCHMARK_DEFINE_F(AlexNetSystemTestQS8, neon_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run AlexNet - profiler.start(); - network.run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(AlexNetSystemTestQS8, neon_alexnet) -->Threads(1) -->Iterations(10) -->ArgName("batch_size") -->Arg(1) -->Arg(4) -->Arg(8);
\ No newline at end of file diff --git a/tests/benchmark/system_tests/NEON/LeNet5.cpp b/tests/benchmark/system_tests/NEON/LeNet5.cpp deleted file mode 100644 index 8724d8ba3a..0000000000 --- a/tests/benchmark/system_tests/NEON/LeNet5.cpp +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "Globals.h" -#include "NEON/NEAccessor.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" -#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h" -#include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" - -#include "benchmark/benchmark_api.h" - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::neon; - -#include "benchmark/system_tests/common/LeNet5.h" - -namespace -{ -using LeNet5SystemTest = LeNet5Fixture<Tensor, - NEAccessor, - NEActivationLayer, - NEConvolutionLayer, - NEFullyConnectedLayer, - NEPoolingLayer, - NESoftmaxLayer>; -} // namespace - -BENCHMARK_DEFINE_F(LeNet5SystemTest, neon_lenet5) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run LeNet5 - profiler.start(); - network.run(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(LeNet5SystemTest, neon_lenet5) -->Threads(1) -->Iterations(10) -->ArgName("batch_size") -->Arg(1) -->Arg(16) -->Arg(32); diff --git a/tests/benchmark/system_tests/common/AlexNet.h b/tests/benchmark/system_tests/common/AlexNet.h deleted file mode 100644 index 46e26bf723..0000000000 --- a/tests/benchmark/system_tests/common/AlexNet.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_TEST_BENCHMARK_ALEXNET_H__ -#define __ARM_COMPUTE_TEST_BENCHMARK_ALEXNET_H__ - -#include "tests/TensorLibrary.h" -#include "tests/Utils.h" - -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "model_objects/AlexNet.h" - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; - -namespace arm_compute -{ -namespace test -{ -namespace benchmark -{ -template <typename ITensorType, - typename TensorType, - typename SubTensorType, - typename Accessor, - typename ActivationLayerFunction, - typename ConvolutionLayerFunction, - typename FullyConnectedLayerFunction, - typename NormalizationLayerFunction, - typename PoolingLayerFunction, - typename SoftmaxLayerFunction, - DataType dt = DataType::F32> -class AlexNetFixture : public ::benchmark::Fixture -{ -public: - void SetUp(::benchmark::State &state) override - { - profiler.add(std::make_shared<WallClockTimer>()); - - const unsigned int batches = static_cast<unsigned int>(state.range(0)); - const bool weights_transposed = true; - - network.init_weights(batches, weights_transposed); - network.build(); - network.allocate(); - network.fill_random(); - } - - void TearDown(::benchmark::State &state) override - { - profiler.submit(state); - network.clear(); - } - - Profiler profiler{}; - model_objects::AlexNet<ITensorType, - TensorType, - SubTensorType, - Accessor, - ActivationLayerFunction, - ConvolutionLayerFunction, - FullyConnectedLayerFunction, - NormalizationLayerFunction, - PoolingLayerFunction, - SoftmaxLayerFunction, - dt> - network{}; -}; -} // namespace benchmark -} // namespace test -} // namespace arm_compute -#endif //__ARM_COMPUTE_TEST_BENCHMARK_ALEXNET_H__ diff --git a/tests/benchmark/system_tests/common/LeNet5.h b/tests/benchmark/system_tests/common/LeNet5.h deleted file mode 100644 index a2d0cd577c..0000000000 --- a/tests/benchmark/system_tests/common/LeNet5.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_TEST_BENCHMARK_LENET5_H__ -#define __ARM_COMPUTE_TEST_BENCHMARK_LENET5_H__ - -#include "tests/TensorLibrary.h" -#include "tests/Utils.h" - -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "model_objects/LeNet5.h" - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; - -namespace arm_compute -{ -namespace test -{ -namespace benchmark -{ -template <typename TensorType, - typename Accessor, - typename ActivationLayerFunction, - typename ConvolutionLayerFunction, - typename FullyConnectedLayerFunction, - typename PoolingLayerFunction, - typename SoftmaxLayerFunction> -class LeNet5Fixture : public ::benchmark::Fixture -{ -public: - void SetUp(::benchmark::State &state) override - { - profiler.add(std::make_shared<WallClockTimer>()); - - network.build(static_cast<unsigned int>(state.range(0))); - network.fill_random(); - } - - void TearDown(::benchmark::State &state) override - { - profiler.submit(state); - network.clear(); - } - - Profiler profiler{}; - model_objects::LeNet5<TensorType, - Accessor, - ActivationLayerFunction, - ConvolutionLayerFunction, - FullyConnectedLayerFunction, - PoolingLayerFunction, - SoftmaxLayerFunction> - network{}; -}; -} // namespace benchmark -} // namespace test -} // namespace arm_compute -#endif /* __ARM_COMPUTE_TEST_BENCHMARK_LENET5_H__ */ |