diff options
Diffstat (limited to 'tests/benchmark/CL')
-rw-r--r-- | tests/benchmark/CL/ActivationLayer.cpp | 211 | ||||
-rw-r--r-- | tests/benchmark/CL/BitwiseAnd.cpp | 133 | ||||
-rw-r--r-- | tests/benchmark/CL/CMakeLists.txt | 57 | ||||
-rw-r--r-- | tests/benchmark/CL/ConvolutionLayer.cpp | 276 | ||||
-rw-r--r-- | tests/benchmark/CL/FullyConnectedLayer.cpp | 115 | ||||
-rw-r--r-- | tests/benchmark/CL/GEMM.cpp | 539 | ||||
-rw-r--r-- | tests/benchmark/CL/GEMM.h | 102 | ||||
-rw-r--r-- | tests/benchmark/CL/NormalizationLayer.cpp | 92 | ||||
-rw-r--r-- | tests/benchmark/CL/PoolingLayer.cpp | 140 |
9 files changed, 0 insertions, 1665 deletions
diff --git a/tests/benchmark/CL/ActivationLayer.cpp b/tests/benchmark/CL/ActivationLayer.cpp deleted file mode 100644 index 52a357b2a6..0000000000 --- a/tests/benchmark/CL/ActivationLayer.cpp +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "CL/CLAccessor.h" -#include "Globals.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLActivationLayer.h" - -#include "benchmark/benchmark_api.h" - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::cl; - -#include "benchmark/common/ActivationLayer.h" - -namespace -{ -using ActivationLayerAlexNet = ActivationLayer<AlexNetActivationLayerDataset, CLTensor, CLAccessor, CLActivationLayer>; -using ActivationLayerLeNet5 = ActivationLayer<LeNet5ActivationLayerDataset, CLTensor, CLAccessor, CLActivationLayer>; -using ActivationLayerGoogLeNet = ActivationLayer<GoogLeNetActivationLayerDataset, CLTensor, CLAccessor, CLActivationLayer>; -} // namespace - -BENCHMARK_DEFINE_F(ActivationLayerAlexNet, cl_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - act_layer.run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 2, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 3, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 4, 1, 4, 8>); - -BENCHMARK_DEFINE_F(ActivationLayerLeNet5, cl_lenet5) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - act_layer.run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(ActivationLayerLeNet5, cl_lenet5) -->Threads(1) -->Apply(DataSetArgBatched<LeNet5ActivationLayerDataset, 0, 1, 4, 8>); - -BENCHMARK_DEFINE_F(ActivationLayerGoogLeNet, cl_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - act_layer.run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 2, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 3, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 4, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 5, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 6, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 7, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 8, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 9, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 10, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 11, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 12, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 13, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 14, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 15, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 16, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 17, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 18, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 19, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 20, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 21, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 22, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 23, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 24, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 25, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 26, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 27, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 28, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 29, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 30, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 31, 1, 4, 8>); -BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 32, 1, 4, 8>); diff --git a/tests/benchmark/CL/BitwiseAnd.cpp b/tests/benchmark/CL/BitwiseAnd.cpp deleted file mode 100644 index 4858c73948..0000000000 --- a/tests/benchmark/CL/BitwiseAnd.cpp +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "CL/CLAccessor.h" -#include "Globals.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLBitwiseAnd.h" - -#include "benchmark/benchmark_api.h" - -#include <memory> -#include <string> - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::cl; - -namespace -{ -template <typename DataSet> -class BitwiseAnd : public ::benchmark::Fixture -{ -public: - void SetUp(::benchmark::State &state) override - { - ::benchmark::Fixture::SetUp(state); - - profiler.add(std::make_shared<WallClockTimer>()); - - const std::string image_name = *(DataSet().begin() + state.range(0)); - const RawTensor &raw = library->get(image_name); - - // Create tensors - src1 = create_tensor<CLTensor>(raw.shape(), DataType::U8); - src2 = create_tensor<CLTensor>(raw.shape(), DataType::U8); - dst = create_tensor<CLTensor>(raw.shape(), DataType::U8); - - // Create and configure function - band.configure(&src1, &src2, &dst); - - // Allocate tensors - src1.allocator()->allocate(); - src2.allocator()->allocate(); - dst.allocator()->allocate(); - - // Fill source tensors - library->fill(CLAccessor(src1), image_name, Channel::R); - library->fill(CLAccessor(src2), image_name, Channel::G); - } - - void TearDown(::benchmark::State &state) override - { - profiler.submit(state); - - ::benchmark::Fixture::TearDown(state); - } - - CLBitwiseAnd band{}; - Profiler profiler{}; - -private: - CLTensor src1{}; - CLTensor src2{}; - CLTensor dst{}; -}; - -using BitwiseAndSmall = BitwiseAnd<SmallImages>; -using BitwiseAndLarge = BitwiseAnd<LargeImages>; -} // namespace - -BENCHMARK_DEFINE_F(BitwiseAndSmall, cl_bitwise_and) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - band.run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(BitwiseAndSmall, cl_bitwise_and) -->Threads(1) -->Apply(DataSetArgs<SmallImages>); - -BENCHMARK_DEFINE_F(BitwiseAndLarge, cl_bitwise_and) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - band.run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(BitwiseAndLarge, cl_bitwise_and) -->Threads(1) -->Apply(DataSetArgs<LargeImages>); diff --git a/tests/benchmark/CL/CMakeLists.txt b/tests/benchmark/CL/CMakeLists.txt deleted file mode 100644 index 8493309f40..0000000000 --- a/tests/benchmark/CL/CMakeLists.txt +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2017 ARM Limited. -# -# SPDX-License-Identifier: MIT -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to -# deal in the Software without restriction, including without limitation the -# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -# sell copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -cmake_minimum_required (VERSION 3.1) - -include_directories(${CMAKE_SOURCE_DIR}/../include) - -set(arm_compute_test_benchmark_TARGET_DEFINITIONS - ${arm_compute_test_benchmark_TARGET_DEFINITIONS} - -DOPENCL - PARENT_SCOPE -) - -set(arm_compute_test_benchmark_TARGET_INCLUDES - ${arm_compute_test_benchmark_TARGET_INCLUDES} - ${CMAKE_SOURCE_DIR}/../include - PARENT_SCOPE -) - -set(arm_compute_test_benchmark_OPENCL_SOURCE_FILES - ${CMAKE_SOURCE_DIR}/CL/CLAccessor.h - ${CMAKE_CURRENT_SOURCE_DIR}/Bitwise/BitwiseAnd.cpp -) - -add_library(arm_compute_test_benchmark_OPENCL OBJECT - ${arm_compute_test_benchmark_OPENCL_SOURCE_FILES} -) - -set(arm_compute_test_benchmark_TARGET_OBJECTS - ${arm_compute_test_benchmark_TARGET_OBJECTS} - $<TARGET_OBJECTS:arm_compute_test_benchmark_OPENCL> - PARENT_SCOPE -) - -set(arm_compute_test_benchmark_TARGET_LIBRARIES - ${arm_compute_test_benchmark_TARGET_LIBRARIES} - OpenCL - PARENT_SCOPE -) diff --git a/tests/benchmark/CL/ConvolutionLayer.cpp b/tests/benchmark/CL/ConvolutionLayer.cpp deleted file mode 100644 index e790273f9c..0000000000 --- a/tests/benchmark/CL/ConvolutionLayer.cpp +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "CL/CLAccessor.h" -#include "Globals.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" - -#include "benchmark/benchmark_api.h" - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::cl; - -#include "benchmark/common/ConvolutionLayer.h" - -namespace -{ -using ConvolutionLayerAlexNet = ConvolutionLayer<AlexNetConvolutionLayerDataset, CLTensor, CLAccessor, CLConvolutionLayer>; -using ConvolutionLayerLeNet5 = ConvolutionLayer<LeNet5ConvolutionLayerDataset, CLTensor, CLAccessor, CLConvolutionLayer>; -using ConvolutionLayerGoogLeNet1 = ConvolutionLayer<GoogLeNetConvolutionLayerDataset1, CLTensor, CLAccessor, CLConvolutionLayer>; -using ConvolutionLayerGoogLeNet2 = ConvolutionLayer<GoogLeNetConvolutionLayerDataset2, CLTensor, CLAccessor, CLConvolutionLayer>; -} // namespace - -BENCHMARK_DEFINE_F(ConvolutionLayerAlexNet, cl_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - conv_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 2, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 3, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 4, 1, 4, 8>); - -BENCHMARK_DEFINE_F(ConvolutionLayerLeNet5, cl_lenet5) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - conv_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(ConvolutionLayerLeNet5, cl_lenet5) -->Threads(1) -->Apply(DataSetArgBatched<LeNet5ConvolutionLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerLeNet5, cl_lenet5) -->Threads(1) -->Apply(DataSetArgBatched<LeNet5ConvolutionLayerDataset, 1, 1, 4, 8>); - -BENCHMARK_DEFINE_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - conv_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_DEFINE_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - conv_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 2, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 3, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 4, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 5, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 6, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 7, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 8, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 9, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 10, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 11, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 12, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 13, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 14, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 15, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 16, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 17, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 18, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 19, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 20, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 21, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 22, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 23, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 24, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 25, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 26, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 27, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 28, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 29, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 30, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 31, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 2, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 3, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 4, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 5, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 6, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 7, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 8, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 9, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 10, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 11, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 12, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 13, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 14, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 15, 1, 4, 8>); -BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 16, 1, 4, 8>); diff --git a/tests/benchmark/CL/FullyConnectedLayer.cpp b/tests/benchmark/CL/FullyConnectedLayer.cpp deleted file mode 100644 index fb8e1bc09f..0000000000 --- a/tests/benchmark/CL/FullyConnectedLayer.cpp +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "CL/CLAccessor.h" -#include "Globals.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" - -#include "benchmark/benchmark_api.h" - -#include <memory> -#include <string> - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::cl; - -#include "benchmark/common/FullyConnectedLayer.h" - -namespace -{ -using FullyConnectedLayerAlexNet = FullyConnectedLayer<AlexNetFullyConnectedLayerDataset, CLTensor, CLAccessor, CLFullyConnectedLayer>; -using FullyConnectedLayerLeNet5 = FullyConnectedLayer<LeNet5FullyConnectedLayerDataset, CLTensor, CLAccessor, CLFullyConnectedLayer>; -using FullyConnectedLayerGoogLeNet = FullyConnectedLayer<GoogLeNetFullyConnectedLayerDataset, CLTensor, CLAccessor, CLFullyConnectedLayer>; -} // namespace - -BENCHMARK_DEFINE_F(FullyConnectedLayerAlexNet, cl_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - fc_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 2, 1, 4, 8>); - -BENCHMARK_DEFINE_F(FullyConnectedLayerLeNet5, cl_lenet5) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - fc_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(FullyConnectedLayerLeNet5, cl_lenet5) -->Threads(1) -->Apply(DataSetArgBatched<LeNet5FullyConnectedLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(FullyConnectedLayerLeNet5, cl_lenet5) -->Threads(1) -->Apply(DataSetArgBatched<LeNet5FullyConnectedLayerDataset, 1, 1, 4, 8>); - -BENCHMARK_DEFINE_F(FullyConnectedLayerGoogLeNet, cl_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - fc_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(FullyConnectedLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetFullyConnectedLayerDataset, 0, 1, 4, 8>); diff --git a/tests/benchmark/CL/GEMM.cpp b/tests/benchmark/CL/GEMM.cpp deleted file mode 100644 index 87dad05f30..0000000000 --- a/tests/benchmark/CL/GEMM.cpp +++ /dev/null @@ -1,539 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "CL/CLAccessor.h" -#include "Globals.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLGEMM.h" - -#include "benchmark/benchmark_api.h" - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::cl; - -#include "benchmark/CL/GEMM.h" - -namespace -{ -using GEMMFP16GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, CLTensor, CLAccessor, CLGEMM, DataType::F16>; -using GEMMFP16GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, CLTensor, CLAccessor, CLGEMM, DataType::F16>; -using GEMMFP32GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, CLTensor, CLAccessor, CLGEMM, DataType::F32>; -using GEMMFP32GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, CLTensor, CLAccessor, CLGEMM, DataType::F32>; -using FP16MatrixMultiply = GEMM<MatrixMultiplyDataset, CLTensor, CLAccessor, CLGEMM, DataType::F16>; -using FP32MatrixMultiply = GEMM<MatrixMultiplyDataset, CLTensor, CLAccessor, CLGEMM, DataType::F32>; -} // namespace - -BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet1, cl_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - gemm_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet2, cl_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - gemm_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>); -BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>); - -BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet1, cl_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - gemm_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet2, cl_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - gemm_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>); -BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) -->Threads(1) -->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>); - -BENCHMARK_DEFINE_F(FP16MatrixMultiply, cl_matrix_multiply) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - gemm_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(FP16MatrixMultiply, cl_matrix_multiply) -->Threads(1) -->Apply(DataSetArg<MatrixMultiplyDataset, 0>); -BENCHMARK_REGISTER_F(FP16MatrixMultiply, cl_matrix_multiply) -->Threads(1) -->Apply(DataSetArg<MatrixMultiplyDataset, 1>); -BENCHMARK_REGISTER_F(FP16MatrixMultiply, cl_matrix_multiply) -->Threads(1) -->Apply(DataSetArg<MatrixMultiplyDataset, 2>); - -BENCHMARK_DEFINE_F(FP32MatrixMultiply, cl_matrix_multiply) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - gemm_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(FP32MatrixMultiply, cl_matrix_multiply) -->Threads(1) -->Apply(DataSetArg<MatrixMultiplyDataset, 0>); -BENCHMARK_REGISTER_F(FP32MatrixMultiply, cl_matrix_multiply) -->Threads(1) -->Apply(DataSetArg<MatrixMultiplyDataset, 1>); -BENCHMARK_REGISTER_F(FP32MatrixMultiply, cl_matrix_multiply) -->Threads(1) -->Apply(DataSetArg<MatrixMultiplyDataset, 2>); diff --git a/tests/benchmark/CL/GEMM.h b/tests/benchmark/CL/GEMM.h deleted file mode 100644 index ca3d9ad594..0000000000 --- a/tests/benchmark/CL/GEMM.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_TEST_BENCHMARK_CL_GEMM_H__ -#define __ARM_COMPUTE_TEST_BENCHMARK_CL_GEMM_H__ - -#include "TensorLibrary.h" -#include "Utils.h" -#include "dataset/GEMMDataset.h" - -#include <memory> - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; - -namespace arm_compute -{ -namespace test -{ -namespace benchmark -{ -// FIXME: Merge with NEON/GEMM.h into common/GEMM.h after adding F16 support to NEON GEMM and QS8 support to CL GEMM -template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType data_type> -class GEMM : public ::benchmark::Fixture -{ -public: - void SetUp(::benchmark::State &state) override - { - ARM_COMPUTE_ERROR_ON_MSG(data_type != DataType::F16 && data_type != DataType::F32, "Unsupported data type for GEMM operation"); - - profiler.add(std::make_shared<WallClockTimer>()); - - const GEMMDataObject gemm_obj = *(DataSet().begin() + state.range(0)); - - TensorShape shape_a = gemm_obj.shape_a; - TensorShape shape_b = gemm_obj.shape_b; - TensorShape shape_c = gemm_obj.shape_c; - TensorShape shape_d = gemm_obj.shape_d; - - // Create tensors - a = create_tensor<CLTensor>(shape_a, data_type); - b = create_tensor<CLTensor>(shape_b, data_type); - c = create_tensor<CLTensor>(shape_c, data_type); - d = create_tensor<CLTensor>(shape_d, data_type); - - // Create and configure function - gemm_layer = std::unique_ptr<Function>(new Function()); - gemm_layer->configure(&a, &b, &c, &d, gemm_obj.alpha, gemm_obj.beta); - - // Allocate tensors - a.allocator()->allocate(); - b.allocator()->allocate(); - c.allocator()->allocate(); - d.allocator()->allocate(); - } - - void TearDown(::benchmark::State &state) override - { - gemm_layer.reset(); - - a.allocator()->free(); - b.allocator()->free(); - c.allocator()->free(); - d.allocator()->free(); - - profiler.submit(state); - } - - std::unique_ptr<Function> gemm_layer{ nullptr }; - Profiler profiler{}; - -private: - TensorType a{}; - TensorType b{}; - TensorType c{}; - TensorType d{}; -}; -} // namespace benchmark -} // namespace test -} // namespace arm_compute -#endif //__ARM_COMPUTE_TEST_BENCHMARK_CL_GEMM_H__ diff --git a/tests/benchmark/CL/NormalizationLayer.cpp b/tests/benchmark/CL/NormalizationLayer.cpp deleted file mode 100644 index 28f89dce1f..0000000000 --- a/tests/benchmark/CL/NormalizationLayer.cpp +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "CL/CLAccessor.h" -#include "Globals.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h" - -#include "benchmark/benchmark_api.h" - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::cl; - -#include "benchmark/common/NormalizationLayer.h" - -namespace -{ -using NormalizationLayerAlexNet = NormalizationLayer<AlexNetNormalizationLayerDataset, CLTensor, CLAccessor, CLNormalizationLayer>; -using NormalizationLayerGoogLeNet = NormalizationLayer<GoogLeNetNormalizationLayerDataset, CLTensor, CLAccessor, CLNormalizationLayer>; - -} // namespace - -BENCHMARK_DEFINE_F(NormalizationLayerAlexNet, cl_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - norm_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(NormalizationLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(NormalizationLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 1, 1, 4, 8>); - -BENCHMARK_DEFINE_F(NormalizationLayerGoogLeNet, cl_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - norm_layer->run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(NormalizationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetNormalizationLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(NormalizationLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetNormalizationLayerDataset, 1, 1, 4, 8>); diff --git a/tests/benchmark/CL/PoolingLayer.cpp b/tests/benchmark/CL/PoolingLayer.cpp deleted file mode 100644 index 05764a412a..0000000000 --- a/tests/benchmark/CL/PoolingLayer.cpp +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "CL/CLAccessor.h" -#include "Globals.h" -#include "TensorLibrary.h" -#include "benchmark/Datasets.h" -#include "benchmark/Profiler.h" -#include "benchmark/WallClockTimer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h" - -#include "benchmark/benchmark_api.h" - -using namespace arm_compute; -using namespace arm_compute::test; -using namespace arm_compute::test::benchmark; -using namespace arm_compute::test::cl; - -#include "benchmark/common/PoolingLayer.h" - -namespace -{ -using PoolingLayerAlexNet = PoolingLayer<AlexNetPoolingLayerDataset, CLTensor, CLAccessor, CLPoolingLayer>; -using PoolingLayerLeNet5 = PoolingLayer<LeNet5PoolingLayerDataset, CLTensor, CLAccessor, CLPoolingLayer>; -using PoolingLayerGoogLeNet = PoolingLayer<GoogLeNetPoolingLayerDataset, CLTensor, CLAccessor, CLPoolingLayer>; -} // namespace - -BENCHMARK_DEFINE_F(PoolingLayerAlexNet, cl_alexnet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - pool_layer.run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(PoolingLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerAlexNet, cl_alexnet) -->Threads(1) -->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 2, 1, 4, 8>); - -BENCHMARK_DEFINE_F(PoolingLayerLeNet5, cl_lenet5) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - pool_layer.run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -BENCHMARK_REGISTER_F(PoolingLayerLeNet5, cl_lenet5) -->Threads(1) -->Apply(DataSetArgBatched<LeNet5PoolingLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerLeNet5, cl_lenet5) -->Threads(1) -->Apply(DataSetArgBatched<LeNet5PoolingLayerDataset, 1, 1, 4, 8>); - -BENCHMARK_DEFINE_F(PoolingLayerGoogLeNet, cl_googlenet) -(::benchmark::State &state) -{ - while(state.KeepRunning()) - { - // Run function - profiler.start(); - pool_layer.run(); - CLScheduler::get().sync(); - profiler.stop(); - } -} - -// FIXME: Add support for 7x7 pooling layer pool5/7x7_s1 -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 0, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 1, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 2, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 3, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 4, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 5, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 6, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 7, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 8, 1, 4, 8>); -BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) -->Threads(1) -->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 9, 1, 4, 8>); |