aboutsummaryrefslogtreecommitdiff
path: root/tests/benchmark/CL
diff options
context:
space:
mode:
authorMoritz Pflanzer <moritz.pflanzer@arm.com>2017-07-05 10:52:21 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-09-17 14:16:42 +0100
commitee493ae23b8cd6de5a6c578cea34bccb478d2f64 (patch)
tree154d1f8652f659128d3d76a1ac49cc942816b090 /tests/benchmark/CL
parentd7a5d22dd6b2a968469ea511f11907b131ec1c67 (diff)
downloadComputeLibrary-ee493ae23b8cd6de5a6c578cea34bccb478d2f64.tar.gz
COMPMID-415: Port benchmark tests and remove google benchmark
Change-Id: I2f17720a4e974b2cc4481f2884d9f351e8f78b5f Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79776 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'tests/benchmark/CL')
-rw-r--r--tests/benchmark/CL/ActivationLayer.cpp211
-rw-r--r--tests/benchmark/CL/BitwiseAnd.cpp133
-rw-r--r--tests/benchmark/CL/CMakeLists.txt57
-rw-r--r--tests/benchmark/CL/ConvolutionLayer.cpp276
-rw-r--r--tests/benchmark/CL/FullyConnectedLayer.cpp115
-rw-r--r--tests/benchmark/CL/GEMM.cpp539
-rw-r--r--tests/benchmark/CL/GEMM.h102
-rw-r--r--tests/benchmark/CL/NormalizationLayer.cpp92
-rw-r--r--tests/benchmark/CL/PoolingLayer.cpp140
9 files changed, 0 insertions, 1665 deletions
diff --git a/tests/benchmark/CL/ActivationLayer.cpp b/tests/benchmark/CL/ActivationLayer.cpp
deleted file mode 100644
index 52a357b2a6..0000000000
--- a/tests/benchmark/CL/ActivationLayer.cpp
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "CL/CLAccessor.h"
-#include "Globals.h"
-#include "TensorLibrary.h"
-#include "benchmark/Datasets.h"
-#include "benchmark/Profiler.h"
-#include "benchmark/WallClockTimer.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
-
-#include "benchmark/benchmark_api.h"
-
-using namespace arm_compute;
-using namespace arm_compute::test;
-using namespace arm_compute::test::benchmark;
-using namespace arm_compute::test::cl;
-
-#include "benchmark/common/ActivationLayer.h"
-
-namespace
-{
-using ActivationLayerAlexNet = ActivationLayer<AlexNetActivationLayerDataset, CLTensor, CLAccessor, CLActivationLayer>;
-using ActivationLayerLeNet5 = ActivationLayer<LeNet5ActivationLayerDataset, CLTensor, CLAccessor, CLActivationLayer>;
-using ActivationLayerGoogLeNet = ActivationLayer<GoogLeNetActivationLayerDataset, CLTensor, CLAccessor, CLActivationLayer>;
-} // namespace
-
-BENCHMARK_DEFINE_F(ActivationLayerAlexNet, cl_alexnet)
-(::benchmark::State &state)
-{
- while(state.KeepRunning())
- {
- // Run function
- profiler.start();
- act_layer.run();
- CLScheduler::get().sync();
- profiler.stop();
- }
-}
-
-BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 1, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 2, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 3, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 4, 1, 4, 8>);
-
-BENCHMARK_DEFINE_F(ActivationLayerLeNet5, cl_lenet5)
-(::benchmark::State &state)
-{
- while(state.KeepRunning())
- {
- // Run function
- profiler.start();
- act_layer.run();
- CLScheduler::get().sync();
- profiler.stop();
- }
-}
-
-BENCHMARK_REGISTER_F(ActivationLayerLeNet5, cl_lenet5)
-->Threads(1)
-->Apply(DataSetArgBatched<LeNet5ActivationLayerDataset, 0, 1, 4, 8>);
-
-BENCHMARK_DEFINE_F(ActivationLayerGoogLeNet, cl_googlenet)
-(::benchmark::State &state)
-{
- while(state.KeepRunning())
- {
- // Run function
- profiler.start();
- act_layer.run();
- CLScheduler::get().sync();
- profiler.stop();
- }
-}
-
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 1, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 2, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 3, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 4, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 5, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 6, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 7, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 8, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 9, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 10, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 11, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 12, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 13, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 14, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 15, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 16, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 17, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 18, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 19, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 20, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 21, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 22, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 23, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 24, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 25, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 26, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 27, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 28, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 29, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 30, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 31, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 32, 1, 4, 8>);
diff --git a/tests/benchmark/CL/BitwiseAnd.cpp b/tests/benchmark/CL/BitwiseAnd.cpp
deleted file mode 100644
index 4858c73948..0000000000
--- a/tests/benchmark/CL/BitwiseAnd.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "CL/CLAccessor.h"
-#include "Globals.h"
-#include "TensorLibrary.h"
-#include "benchmark/Datasets.h"
-#include "benchmark/Profiler.h"
-#include "benchmark/WallClockTimer.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLBitwiseAnd.h"
-
-#include "benchmark/benchmark_api.h"
-
-#include <memory>
-#include <string>
-
-using namespace arm_compute;
-using namespace arm_compute::test;
-using namespace arm_compute::test::benchmark;
-using namespace arm_compute::test::cl;
-
-namespace
-{
-template <typename DataSet>
-class BitwiseAnd : public ::benchmark::Fixture
-{
-public:
- void SetUp(::benchmark::State &state) override
- {
- ::benchmark::Fixture::SetUp(state);
-
- profiler.add(std::make_shared<WallClockTimer>());
-
- const std::string image_name = *(DataSet().begin() + state.range(0));
- const RawTensor &raw = library->get(image_name);
-
- // Create tensors
- src1 = create_tensor<CLTensor>(raw.shape(), DataType::U8);
- src2 = create_tensor<CLTensor>(raw.shape(), DataType::U8);
- dst = create_tensor<CLTensor>(raw.shape(), DataType::U8);
-
- // Create and configure function
- band.configure(&src1, &src2, &dst);
-
- // Allocate tensors
- src1.allocator()->allocate();
- src2.allocator()->allocate();
- dst.allocator()->allocate();
-
- // Fill source tensors
- library->fill(CLAccessor(src1), image_name, Channel::R);
- library->fill(CLAccessor(src2), image_name, Channel::G);
- }
-
- void TearDown(::benchmark::State &state) override
- {
- profiler.submit(state);
-
- ::benchmark::Fixture::TearDown(state);
- }
-
- CLBitwiseAnd band{};
- Profiler profiler{};
-
-private:
- CLTensor src1{};
- CLTensor src2{};
- CLTensor dst{};
-};
-
-using BitwiseAndSmall = BitwiseAnd<SmallImages>;
-using BitwiseAndLarge = BitwiseAnd<LargeImages>;
-} // namespace
-
-BENCHMARK_DEFINE_F(BitwiseAndSmall, cl_bitwise_and)
-(::benchmark::State &state)
-{
- while(state.KeepRunning())
- {
- // Run function
- profiler.start();
- band.run();
- CLScheduler::get().sync();
- profiler.stop();
- }
-}
-
-BENCHMARK_REGISTER_F(BitwiseAndSmall, cl_bitwise_and)
-->Threads(1)
-->Apply(DataSetArgs<SmallImages>);
-
-BENCHMARK_DEFINE_F(BitwiseAndLarge, cl_bitwise_and)
-(::benchmark::State &state)
-{
- while(state.KeepRunning())
- {
- // Run function
- profiler.start();
- band.run();
- CLScheduler::get().sync();
- profiler.stop();
- }
-}
-
-BENCHMARK_REGISTER_F(BitwiseAndLarge, cl_bitwise_and)
-->Threads(1)
-->Apply(DataSetArgs<LargeImages>);
diff --git a/tests/benchmark/CL/CMakeLists.txt b/tests/benchmark/CL/CMakeLists.txt
deleted file mode 100644
index 8493309f40..0000000000
--- a/tests/benchmark/CL/CMakeLists.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-# Copyright (c) 2017 ARM Limited.
-#
-# SPDX-License-Identifier: MIT
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to
-# deal in the Software without restriction, including without limitation the
-# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-# sell copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-cmake_minimum_required (VERSION 3.1)
-
-include_directories(${CMAKE_SOURCE_DIR}/../include)
-
-set(arm_compute_test_benchmark_TARGET_DEFINITIONS
- ${arm_compute_test_benchmark_TARGET_DEFINITIONS}
- -DOPENCL
- PARENT_SCOPE
-)
-
-set(arm_compute_test_benchmark_TARGET_INCLUDES
- ${arm_compute_test_benchmark_TARGET_INCLUDES}
- ${CMAKE_SOURCE_DIR}/../include
- PARENT_SCOPE
-)
-
-set(arm_compute_test_benchmark_OPENCL_SOURCE_FILES
- ${CMAKE_SOURCE_DIR}/CL/CLAccessor.h
- ${CMAKE_CURRENT_SOURCE_DIR}/Bitwise/BitwiseAnd.cpp
-)
-
-add_library(arm_compute_test_benchmark_OPENCL OBJECT
- ${arm_compute_test_benchmark_OPENCL_SOURCE_FILES}
-)
-
-set(arm_compute_test_benchmark_TARGET_OBJECTS
- ${arm_compute_test_benchmark_TARGET_OBJECTS}
- $<TARGET_OBJECTS:arm_compute_test_benchmark_OPENCL>
- PARENT_SCOPE
-)
-
-set(arm_compute_test_benchmark_TARGET_LIBRARIES
- ${arm_compute_test_benchmark_TARGET_LIBRARIES}
- OpenCL
- PARENT_SCOPE
-)
diff --git a/tests/benchmark/CL/ConvolutionLayer.cpp b/tests/benchmark/CL/ConvolutionLayer.cpp
deleted file mode 100644
index e790273f9c..0000000000
--- a/tests/benchmark/CL/ConvolutionLayer.cpp
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "CL/CLAccessor.h"
-#include "Globals.h"
-#include "TensorLibrary.h"
-#include "benchmark/Datasets.h"
-#include "benchmark/Profiler.h"
-#include "benchmark/WallClockTimer.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
-
-#include "benchmark/benchmark_api.h"
-
-using namespace arm_compute;
-using namespace arm_compute::test;
-using namespace arm_compute::test::benchmark;
-using namespace arm_compute::test::cl;
-
-#include "benchmark/common/ConvolutionLayer.h"
-
-namespace
-{
-using ConvolutionLayerAlexNet = ConvolutionLayer<AlexNetConvolutionLayerDataset, CLTensor, CLAccessor, CLConvolutionLayer>;
-using ConvolutionLayerLeNet5 = ConvolutionLayer<LeNet5ConvolutionLayerDataset, CLTensor, CLAccessor, CLConvolutionLayer>;
-using ConvolutionLayerGoogLeNet1 = ConvolutionLayer<GoogLeNetConvolutionLayerDataset1, CLTensor, CLAccessor, CLConvolutionLayer>;
-using ConvolutionLayerGoogLeNet2 = ConvolutionLayer<GoogLeNetConvolutionLayerDataset2, CLTensor, CLAccessor, CLConvolutionLayer>;
-} // namespace
-
-BENCHMARK_DEFINE_F(ConvolutionLayerAlexNet, cl_alexnet)
-(::benchmark::State &state)
-{
- while(state.KeepRunning())
- {
- // Run function
- profiler.start();
- conv_layer->run();
- CLScheduler::get().sync();
- profiler.stop();
- }
-}
-
-BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 1, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 2, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 3, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 4, 1, 4, 8>);
-
-BENCHMARK_DEFINE_F(ConvolutionLayerLeNet5, cl_lenet5)
-(::benchmark::State &state)
-{
- while(state.KeepRunning())
- {
- // Run function
- profiler.start();
- conv_layer->run();
- CLScheduler::get().sync();
- profiler.stop();
- }
-}
-
-BENCHMARK_REGISTER_F(ConvolutionLayerLeNet5, cl_lenet5)
-->Threads(1)
-->Apply(DataSetArgBatched<LeNet5ConvolutionLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerLeNet5, cl_lenet5)
-->Threads(1)
-->Apply(DataSetArgBatched<LeNet5ConvolutionLayerDataset, 1, 1, 4, 8>);
-
-BENCHMARK_DEFINE_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-(::benchmark::State &state)
-{
- while(state.KeepRunning())
- {
- // Run function
- profiler.start();
- conv_layer->run();
- CLScheduler::get().sync();
- profiler.stop();
- }
-}
-
-BENCHMARK_DEFINE_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
-(::benchmark::State &state)
-{
- while(state.KeepRunning())
- {
- // Run function
- profiler.start();
- conv_layer->run();
- CLScheduler::get().sync();
- profiler.stop();
- }
-}
-
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 1, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 2, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 3, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 4, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 5, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 6, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 7, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 8, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 9, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 10, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 11, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 12, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 13, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 14, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 15, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 16, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 17, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 18, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 19, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 20, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 21, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 22, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 23, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 24, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 25, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 26, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 27, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 28, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 29, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 30, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 31, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 1, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 2, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 3, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 4, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 5, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 6, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 7, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 8, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 9, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 10, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 11, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 12, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 13, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 14, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 15, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 16, 1, 4, 8>);
diff --git a/tests/benchmark/CL/FullyConnectedLayer.cpp b/tests/benchmark/CL/FullyConnectedLayer.cpp
deleted file mode 100644
index fb8e1bc09f..0000000000
--- a/tests/benchmark/CL/FullyConnectedLayer.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "CL/CLAccessor.h"
-#include "Globals.h"
-#include "TensorLibrary.h"
-#include "benchmark/Datasets.h"
-#include "benchmark/Profiler.h"
-#include "benchmark/WallClockTimer.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
-
-#include "benchmark/benchmark_api.h"
-
-#include <memory>
-#include <string>
-
-using namespace arm_compute;
-using namespace arm_compute::test;
-using namespace arm_compute::test::benchmark;
-using namespace arm_compute::test::cl;
-
-#include "benchmark/common/FullyConnectedLayer.h"
-
-namespace
-{
-using FullyConnectedLayerAlexNet = FullyConnectedLayer<AlexNetFullyConnectedLayerDataset, CLTensor, CLAccessor, CLFullyConnectedLayer>;
-using FullyConnectedLayerLeNet5 = FullyConnectedLayer<LeNet5FullyConnectedLayerDataset, CLTensor, CLAccessor, CLFullyConnectedLayer>;
-using FullyConnectedLayerGoogLeNet = FullyConnectedLayer<GoogLeNetFullyConnectedLayerDataset, CLTensor, CLAccessor, CLFullyConnectedLayer>;
-} // namespace
-
-BENCHMARK_DEFINE_F(FullyConnectedLayerAlexNet, cl_alexnet)
-(::benchmark::State &state)
-{
- while(state.KeepRunning())
- {
- // Run function
- profiler.start();
- fc_layer->run();
- CLScheduler::get().sync();
- profiler.stop();
- }
-}
-
-BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNet, cl_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNet, cl_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 1, 1, 4, 8>);
-BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNet, cl_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 2, 1, 4, 8>);
-
-BENCHMARK_DEFINE_F(FullyConnectedLayerLeNet5, cl_lenet5)
-(::benchmark::State &state)
-{
- while(state.KeepRunning())
- {
- // Run function
- profiler.start();
- fc_layer->run();
- CLScheduler::get().sync();
- profiler.stop();
- }
-}
-
-BENCHMARK_REGISTER_F(FullyConnectedLayerLeNet5, cl_lenet5)
-->Threads(1)
-->Apply(DataSetArgBatched<LeNet5FullyConnectedLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(FullyConnectedLayerLeNet5, cl_lenet5)
-->Threads(1)
-->Apply(DataSetArgBatched<LeNet5FullyConnectedLayerDataset, 1, 1, 4, 8>);
-
-BENCHMARK_DEFINE_F(FullyConnectedLayerGoogLeNet, cl_googlenet)
-(::benchmark::State &state)
-{
- while(state.KeepRunning())
- {
- // Run function
- profiler.start();
- fc_layer->run();
- CLScheduler::get().sync();
- profiler.stop();
- }
-}
-
-BENCHMARK_REGISTER_F(FullyConnectedLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetFullyConnectedLayerDataset, 0, 1, 4, 8>);
diff --git a/tests/benchmark/CL/GEMM.cpp b/tests/benchmark/CL/GEMM.cpp
deleted file mode 100644
index 87dad05f30..0000000000
--- a/tests/benchmark/CL/GEMM.cpp
+++ /dev/null
@@ -1,539 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "CL/CLAccessor.h"
-#include "Globals.h"
-#include "TensorLibrary.h"
-#include "benchmark/Datasets.h"
-#include "benchmark/Profiler.h"
-#include "benchmark/WallClockTimer.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLGEMM.h"
-
-#include "benchmark/benchmark_api.h"
-
-using namespace arm_compute;
-using namespace arm_compute::test;
-using namespace arm_compute::test::benchmark;
-using namespace arm_compute::test::cl;
-
-#include "benchmark/CL/GEMM.h"
-
-namespace
-{
-using GEMMFP16GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, CLTensor, CLAccessor, CLGEMM, DataType::F16>;
-using GEMMFP16GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, CLTensor, CLAccessor, CLGEMM, DataType::F16>;
-using GEMMFP32GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, CLTensor, CLAccessor, CLGEMM, DataType::F32>;
-using GEMMFP32GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, CLTensor, CLAccessor, CLGEMM, DataType::F32>;
-using FP16MatrixMultiply = GEMM<MatrixMultiplyDataset, CLTensor, CLAccessor, CLGEMM, DataType::F16>;
-using FP32MatrixMultiply = GEMM<MatrixMultiplyDataset, CLTensor, CLAccessor, CLGEMM, DataType::F32>;
-} // namespace
-
-BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet1, cl_googlenet)
-(::benchmark::State &state)
-{
- while(state.KeepRunning())
- {
- // Run function
- profiler.start();
- gemm_layer->run();
- CLScheduler::get().sync();
- profiler.stop();
- }
-}
-
-BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet2, cl_googlenet)
-(::benchmark::State &state)
-{
- while(state.KeepRunning())
- {
- // Run function
- profiler.start();
- gemm_layer->run();
- CLScheduler::get().sync();
- profiler.stop();
- }
-}
-
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>);
-
-BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet1, cl_googlenet)
-(::benchmark::State &state)
-{
- while(state.KeepRunning())
- {
- // Run function
- profiler.start();
- gemm_layer->run();
- CLScheduler::get().sync();
- profiler.stop();
- }
-}
-
-BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet2, cl_googlenet)
-(::benchmark::State &state)
-{
- while(state.KeepRunning())
- {
- // Run function
- profiler.start();
- gemm_layer->run();
- CLScheduler::get().sync();
- profiler.stop();
- }
-}
-
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>);
-
-BENCHMARK_DEFINE_F(FP16MatrixMultiply, cl_matrix_multiply)
-(::benchmark::State &state)
-{
- while(state.KeepRunning())
- {
- // Run function
- profiler.start();
- gemm_layer->run();
- CLScheduler::get().sync();
- profiler.stop();
- }
-}
-
-BENCHMARK_REGISTER_F(FP16MatrixMultiply, cl_matrix_multiply)
-->Threads(1)
-->Apply(DataSetArg<MatrixMultiplyDataset, 0>);
-BENCHMARK_REGISTER_F(FP16MatrixMultiply, cl_matrix_multiply)
-->Threads(1)
-->Apply(DataSetArg<MatrixMultiplyDataset, 1>);
-BENCHMARK_REGISTER_F(FP16MatrixMultiply, cl_matrix_multiply)
-->Threads(1)
-->Apply(DataSetArg<MatrixMultiplyDataset, 2>);
-
-BENCHMARK_DEFINE_F(FP32MatrixMultiply, cl_matrix_multiply)
-(::benchmark::State &state)
-{
- while(state.KeepRunning())
- {
- // Run function
- profiler.start();
- gemm_layer->run();
- CLScheduler::get().sync();
- profiler.stop();
- }
-}
-
-BENCHMARK_REGISTER_F(FP32MatrixMultiply, cl_matrix_multiply)
-->Threads(1)
-->Apply(DataSetArg<MatrixMultiplyDataset, 0>);
-BENCHMARK_REGISTER_F(FP32MatrixMultiply, cl_matrix_multiply)
-->Threads(1)
-->Apply(DataSetArg<MatrixMultiplyDataset, 1>);
-BENCHMARK_REGISTER_F(FP32MatrixMultiply, cl_matrix_multiply)
-->Threads(1)
-->Apply(DataSetArg<MatrixMultiplyDataset, 2>);
diff --git a/tests/benchmark/CL/GEMM.h b/tests/benchmark/CL/GEMM.h
deleted file mode 100644
index ca3d9ad594..0000000000
--- a/tests/benchmark/CL/GEMM.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_TEST_BENCHMARK_CL_GEMM_H__
-#define __ARM_COMPUTE_TEST_BENCHMARK_CL_GEMM_H__
-
-#include "TensorLibrary.h"
-#include "Utils.h"
-#include "dataset/GEMMDataset.h"
-
-#include <memory>
-
-using namespace arm_compute;
-using namespace arm_compute::test;
-using namespace arm_compute::test::benchmark;
-
-namespace arm_compute
-{
-namespace test
-{
-namespace benchmark
-{
-// FIXME: Merge with NEON/GEMM.h into common/GEMM.h after adding F16 support to NEON GEMM and QS8 support to CL GEMM
-template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType data_type>
-class GEMM : public ::benchmark::Fixture
-{
-public:
- void SetUp(::benchmark::State &state) override
- {
- ARM_COMPUTE_ERROR_ON_MSG(data_type != DataType::F16 && data_type != DataType::F32, "Unsupported data type for GEMM operation");
-
- profiler.add(std::make_shared<WallClockTimer>());
-
- const GEMMDataObject gemm_obj = *(DataSet().begin() + state.range(0));
-
- TensorShape shape_a = gemm_obj.shape_a;
- TensorShape shape_b = gemm_obj.shape_b;
- TensorShape shape_c = gemm_obj.shape_c;
- TensorShape shape_d = gemm_obj.shape_d;
-
- // Create tensors
- a = create_tensor<CLTensor>(shape_a, data_type);
- b = create_tensor<CLTensor>(shape_b, data_type);
- c = create_tensor<CLTensor>(shape_c, data_type);
- d = create_tensor<CLTensor>(shape_d, data_type);
-
- // Create and configure function
- gemm_layer = std::unique_ptr<Function>(new Function());
- gemm_layer->configure(&a, &b, &c, &d, gemm_obj.alpha, gemm_obj.beta);
-
- // Allocate tensors
- a.allocator()->allocate();
- b.allocator()->allocate();
- c.allocator()->allocate();
- d.allocator()->allocate();
- }
-
- void TearDown(::benchmark::State &state) override
- {
- gemm_layer.reset();
-
- a.allocator()->free();
- b.allocator()->free();
- c.allocator()->free();
- d.allocator()->free();
-
- profiler.submit(state);
- }
-
- std::unique_ptr<Function> gemm_layer{ nullptr };
- Profiler profiler{};
-
-private:
- TensorType a{};
- TensorType b{};
- TensorType c{};
- TensorType d{};
-};
-} // namespace benchmark
-} // namespace test
-} // namespace arm_compute
-#endif //__ARM_COMPUTE_TEST_BENCHMARK_CL_GEMM_H__
diff --git a/tests/benchmark/CL/NormalizationLayer.cpp b/tests/benchmark/CL/NormalizationLayer.cpp
deleted file mode 100644
index 28f89dce1f..0000000000
--- a/tests/benchmark/CL/NormalizationLayer.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "CL/CLAccessor.h"
-#include "Globals.h"
-#include "TensorLibrary.h"
-#include "benchmark/Datasets.h"
-#include "benchmark/Profiler.h"
-#include "benchmark/WallClockTimer.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h"
-
-#include "benchmark/benchmark_api.h"
-
-using namespace arm_compute;
-using namespace arm_compute::test;
-using namespace arm_compute::test::benchmark;
-using namespace arm_compute::test::cl;
-
-#include "benchmark/common/NormalizationLayer.h"
-
-namespace
-{
-using NormalizationLayerAlexNet = NormalizationLayer<AlexNetNormalizationLayerDataset, CLTensor, CLAccessor, CLNormalizationLayer>;
-using NormalizationLayerGoogLeNet = NormalizationLayer<GoogLeNetNormalizationLayerDataset, CLTensor, CLAccessor, CLNormalizationLayer>;
-
-} // namespace
-
-BENCHMARK_DEFINE_F(NormalizationLayerAlexNet, cl_alexnet)
-(::benchmark::State &state)
-{
- while(state.KeepRunning())
- {
- // Run function
- profiler.start();
- norm_layer->run();
- CLScheduler::get().sync();
- profiler.stop();
- }
-}
-
-BENCHMARK_REGISTER_F(NormalizationLayerAlexNet, cl_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(NormalizationLayerAlexNet, cl_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 1, 1, 4, 8>);
-
-BENCHMARK_DEFINE_F(NormalizationLayerGoogLeNet, cl_googlenet)
-(::benchmark::State &state)
-{
- while(state.KeepRunning())
- {
- // Run function
- profiler.start();
- norm_layer->run();
- CLScheduler::get().sync();
- profiler.stop();
- }
-}
-
-BENCHMARK_REGISTER_F(NormalizationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetNormalizationLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(NormalizationLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetNormalizationLayerDataset, 1, 1, 4, 8>);
diff --git a/tests/benchmark/CL/PoolingLayer.cpp b/tests/benchmark/CL/PoolingLayer.cpp
deleted file mode 100644
index 05764a412a..0000000000
--- a/tests/benchmark/CL/PoolingLayer.cpp
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "CL/CLAccessor.h"
-#include "Globals.h"
-#include "TensorLibrary.h"
-#include "benchmark/Datasets.h"
-#include "benchmark/Profiler.h"
-#include "benchmark/WallClockTimer.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h"
-
-#include "benchmark/benchmark_api.h"
-
-using namespace arm_compute;
-using namespace arm_compute::test;
-using namespace arm_compute::test::benchmark;
-using namespace arm_compute::test::cl;
-
-#include "benchmark/common/PoolingLayer.h"
-
-namespace
-{
-using PoolingLayerAlexNet = PoolingLayer<AlexNetPoolingLayerDataset, CLTensor, CLAccessor, CLPoolingLayer>;
-using PoolingLayerLeNet5 = PoolingLayer<LeNet5PoolingLayerDataset, CLTensor, CLAccessor, CLPoolingLayer>;
-using PoolingLayerGoogLeNet = PoolingLayer<GoogLeNetPoolingLayerDataset, CLTensor, CLAccessor, CLPoolingLayer>;
-} // namespace
-
-BENCHMARK_DEFINE_F(PoolingLayerAlexNet, cl_alexnet)
-(::benchmark::State &state)
-{
- while(state.KeepRunning())
- {
- // Run function
- profiler.start();
- pool_layer.run();
- CLScheduler::get().sync();
- profiler.stop();
- }
-}
-
-BENCHMARK_REGISTER_F(PoolingLayerAlexNet, cl_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerAlexNet, cl_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 1, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerAlexNet, cl_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 2, 1, 4, 8>);
-
-BENCHMARK_DEFINE_F(PoolingLayerLeNet5, cl_lenet5)
-(::benchmark::State &state)
-{
- while(state.KeepRunning())
- {
- // Run function
- profiler.start();
- pool_layer.run();
- CLScheduler::get().sync();
- profiler.stop();
- }
-}
-
-BENCHMARK_REGISTER_F(PoolingLayerLeNet5, cl_lenet5)
-->Threads(1)
-->Apply(DataSetArgBatched<LeNet5PoolingLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerLeNet5, cl_lenet5)
-->Threads(1)
-->Apply(DataSetArgBatched<LeNet5PoolingLayerDataset, 1, 1, 4, 8>);
-
-BENCHMARK_DEFINE_F(PoolingLayerGoogLeNet, cl_googlenet)
-(::benchmark::State &state)
-{
- while(state.KeepRunning())
- {
- // Run function
- profiler.start();
- pool_layer.run();
- CLScheduler::get().sync();
- profiler.stop();
- }
-}
-
-// FIXME: Add support for 7x7 pooling layer pool5/7x7_s1
-BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 1, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 2, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 3, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 4, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 5, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 6, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 7, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 8, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 9, 1, 4, 8>);