10 files changed, 0 insertions, 2061 deletions
diff --git a/tests/benchmark/NEON/ActivationLayer.cpp b/tests/benchmark/NEON/ActivationLayer.cpp
deleted file mode 100644
index 9ea675e515..0000000000
--- a/tests/benchmark/NEON/ActivationLayer.cpp
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "Globals.h"
-#include "NEON/NEAccessor.h"
-#include "TensorLibrary.h"
-#include "benchmark/Datasets.h"
-#include "benchmark/Profiler.h"
-#include "benchmark/WallClockTimer.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-
-#include "benchmark/benchmark_api.h"
-
-using namespace arm_compute;
-using namespace arm_compute::test;
-using namespace arm_compute::test::benchmark;
-using namespace arm_compute::test::neon;
-
-#include "benchmark/common/ActivationLayer.h"
-
-namespace
-{
-using ActivationLayerAlexNetF32 = ActivationLayer<AlexNetActivationLayerDataset, Tensor, NEAccessor, NEActivationLayer>;
-using ActivationLayerAlexNetQS8 = ActivationLayer<AlexNetActivationLayerDataset, Tensor, NEAccessor, NEActivationLayer, DataType::QS8>;
-using ActivationLayerLeNet5     = ActivationLayer<LeNet5ActivationLayerDataset, Tensor, NEAccessor, NEActivationLayer, DataType::F32>;
-using ActivationLayerGoogLeNet  = ActivationLayer<GoogLeNetActivationLayerDataset, Tensor, NEAccessor, NEActivationLayer, DataType::F32>;
-} // namespace
-
-// F32
-BENCHMARK_DEFINE_F(ActivationLayerAlexNetF32, neon_alexnet)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        act_layer.run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(ActivationLayerAlexNetF32, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerAlexNetF32, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 1, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerAlexNetF32, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 2, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerAlexNetF32, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 3, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerAlexNetF32, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 4, 1, 4, 8>);
-
-// QS8
-BENCHMARK_DEFINE_F(ActivationLayerAlexNetQS8, neon_alexnet)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        act_layer.run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(ActivationLayerAlexNetQS8, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerAlexNetQS8, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 1, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerAlexNetQS8, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 2, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerAlexNetQS8, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 3, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerAlexNetQS8, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 4, 1, 4, 8>);
-
-BENCHMARK_DEFINE_F(ActivationLayerLeNet5, neon_lenet5)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        act_layer.run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(ActivationLayerLeNet5, neon_lenet5)
-->Threads(1)
-->Apply(DataSetArgBatched<LeNet5ActivationLayerDataset, 0, 1, 4, 8>);
-
-BENCHMARK_DEFINE_F(ActivationLayerGoogLeNet, neon_googlenet)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        act_layer.run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 1, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 2, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 3, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 4, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 5, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 6, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 7, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 8, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 9, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 10, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 11, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 12, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 13, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 14, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 15, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 16, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 17, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 18, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 19, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 20, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 21, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 22, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 23, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 24, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 25, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 26, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 27, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 28, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 29, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 30, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 31, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 32, 1, 4, 8>);
diff --git a/tests/benchmark/NEON/BitwiseAnd.cpp b/tests/benchmark/NEON/BitwiseAnd.cpp
deleted file mode 100644
index be68fd39e4..0000000000
--- a/tests/benchmark/NEON/BitwiseAnd.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "Globals.h"
-#include "NEON/NEAccessor.h"
-#include "TensorLibrary.h"
-#include "benchmark/Datasets.h"
-#include "benchmark/Profiler.h"
-#include "benchmark/WallClockTimer.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEBitwiseAnd.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-
-#include "benchmark/benchmark_api.h"
-
-#include <memory>
-#include <string>
-
-using namespace arm_compute;
-using namespace arm_compute::test;
-using namespace arm_compute::test::benchmark;
-using namespace arm_compute::test::neon;
-
-namespace
-{
-template <typename DataSet>
-class BitwiseAnd : public ::benchmark::Fixture
-{
-public:
-    void SetUp(::benchmark::State &state) override
-    {
-        profiler.add(std::make_shared<WallClockTimer>());
-
-        const std::string image_name = *(DataSet().begin() + state.range(0));
-        const RawTensor &raw        = library->get(image_name);
-
-        // Create tensors
-        src1 = create_tensor<Tensor>(raw.shape(), DataType::U8);
-        src2 = create_tensor<Tensor>(raw.shape(), DataType::U8);
-        dst  = create_tensor<Tensor>(raw.shape(), DataType::U8);
-
-        // Create and configure function
-        band.configure(&src1, &src2, &dst);
-
-        // Allocate tensors
-        src1.allocator()->allocate();
-        src2.allocator()->allocate();
-        dst.allocator()->allocate();
-
-        // Fill source tensors
-        library->fill(NEAccessor(src1), image_name, Channel::R);
-        library->fill(NEAccessor(src2), image_name, Channel::G);
-    }
-
-    void TearDown(::benchmark::State &state) override
-    {
-        profiler.submit(state);
-    }
-
-    NEBitwiseAnd band{};
-    Profiler     profiler{};
-
-private:
-    Tensor src1{};
-    Tensor src2{};
-    Tensor dst{};
-};
-
-using BitwiseAndSmall = BitwiseAnd<SmallImages>;
-using BitwiseAndLarge = BitwiseAnd<LargeImages>;
-} // namespace
-
-BENCHMARK_DEFINE_F(BitwiseAndSmall, neon_bitwise_and)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        band.run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(BitwiseAndSmall, neon_bitwise_and)
-->Threads(1)
-->Apply(DataSetArgs<SmallImages>);
-
-BENCHMARK_DEFINE_F(BitwiseAndLarge, neon_bitwise_and)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        band.run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(BitwiseAndLarge, neon_bitwise_and)
-->Threads(1)
-->Apply(DataSetArgs<LargeImages>);
diff --git a/tests/benchmark/NEON/CMakeLists.txt b/tests/benchmark/NEON/CMakeLists.txt
deleted file mode 100644
index 2cb3eb36c9..0000000000
--- a/tests/benchmark/NEON/CMakeLists.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright (c) 2017 ARM Limited.
-#
-# SPDX-License-Identifier: MIT
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to
-# deal in the Software without restriction, including without limitation the
-# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-# sell copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-cmake_minimum_required (VERSION 3.1)
-
-set(arm_compute_test_benchmark_NEON_SOURCE_FILES
-    ${CMAKE_SOURCE_DIR}/NEON/NEAccessor.h
-    ${CMAKE_CURRENT_SOURCE_DIR}/Bitwise/BitwiseAnd.cpp
-)
-
-add_library(arm_compute_test_benchmark_NEON OBJECT
-    ${arm_compute_test_benchmark_NEON_SOURCE_FILES}
-)
-
-SET(arm_compute_test_benchmark_TARGET_OBJECTS
-    ${arm_compute_test_benchmark_TARGET_OBJECTS}
-    $<TARGET_OBJECTS:arm_compute_test_benchmark_NEON>
-    PARENT_SCOPE
-)
diff --git a/tests/benchmark/NEON/ConvolutionLayer.cpp b/tests/benchmark/NEON/ConvolutionLayer.cpp
deleted file mode 100644
index a0b1236177..0000000000
--- a/tests/benchmark/NEON/ConvolutionLayer.cpp
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "Globals.h"
-#include "NEON/NEAccessor.h"
-#include "TensorLibrary.h"
-#include "benchmark/Datasets.h"
-#include "benchmark/Profiler.h"
-#include "benchmark/WallClockTimer.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-
-#include "benchmark/benchmark_api.h"
-
-using namespace arm_compute;
-using namespace arm_compute::test;
-using namespace arm_compute::test::benchmark;
-using namespace arm_compute::test::neon;
-
-#include "benchmark/common/ConvolutionLayer.h"
-
-namespace
-{
-using ConvolutionLayerAlexNetF32 = ConvolutionLayer<AlexNetConvolutionLayerDataset, Tensor, NEAccessor, NEConvolutionLayer>;
-using ConvolutionLayerAlexNetQS8 = ConvolutionLayer<AlexNetConvolutionLayerDataset, Tensor, NEAccessor, NEConvolutionLayer, DataType::QS8>;
-using ConvolutionLayerLeNet5     = ConvolutionLayer<LeNet5ConvolutionLayerDataset, Tensor, NEAccessor, NEConvolutionLayer>;
-using ConvolutionLayerGoogLeNet1 = ConvolutionLayer<GoogLeNetConvolutionLayerDataset1, Tensor, NEAccessor, NEConvolutionLayer>;
-using ConvolutionLayerGoogLeNet2 = ConvolutionLayer<GoogLeNetConvolutionLayerDataset2, Tensor, NEAccessor, NEConvolutionLayer>;
-} // namespace
-
-// F32
-BENCHMARK_DEFINE_F(ConvolutionLayerAlexNetF32, neon_alexnet)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        conv_layer->run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 1, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 2, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 3, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 4, 1, 4, 8>);
-
-// QS8
-BENCHMARK_DEFINE_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        conv_layer->run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 1, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 2, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 3, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 4, 1, 4, 8>);
-
-BENCHMARK_DEFINE_F(ConvolutionLayerLeNet5, neon_lenet5)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        conv_layer->run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(ConvolutionLayerLeNet5, neon_lenet5)
-->Threads(1)
-->Apply(DataSetArgBatched<LeNet5ConvolutionLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerLeNet5, neon_lenet5)
-->Threads(1)
-->Apply(DataSetArgBatched<LeNet5ConvolutionLayerDataset, 1, 1, 4, 8>);
-
-BENCHMARK_DEFINE_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        conv_layer->run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_DEFINE_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        conv_layer->run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 1, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 2, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 3, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 4, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 5, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 6, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 7, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 8, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 9, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 10, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 11, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 12, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 13, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 14, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 15, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 16, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 17, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 18, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 19, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 20, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 21, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 22, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 23, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 24, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 25, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 26, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 27, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 28, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 29, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 30, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 31, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 1, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 2, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 3, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 4, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 5, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 6, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 7, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 8, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 9, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 10, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 11, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 12, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 13, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 14, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 15, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 16, 1, 4, 8>);
diff --git a/tests/benchmark/NEON/ConvolutionLayerDirect.cpp b/tests/benchmark/NEON/ConvolutionLayerDirect.cpp
deleted file mode 100644
index bbe5707e41..0000000000
--- a/tests/benchmark/NEON/ConvolutionLayerDirect.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "Globals.h"
-#include "NEON/NEAccessor.h"
-#include "TensorLibrary.h"
-#include "benchmark/Datasets.h"
-#include "benchmark/Profiler.h"
-#include "benchmark/WallClockTimer.h"
-#include "dataset/ConvolutionLayerDataset.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-
-#include "benchmark/benchmark_api.h"
-
-using namespace arm_compute;
-using namespace arm_compute::test;
-using namespace arm_compute::test::benchmark;
-using namespace arm_compute::test::neon;
-
-#include "benchmark/common/ConvolutionLayer.h"
-
-namespace
-{
-using ConvolutionLayerDirectAlexNet = ConvolutionLayer<AlexNetConvolutionLayerDataset, Tensor, NEAccessor, NEDirectConvolutionLayer>;
-} // namespace
-
-BENCHMARK_DEFINE_F(ConvolutionLayerDirectAlexNet, neon_alexnet)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        conv_layer->run();
-        profiler.stop();
-    }
-}
-
-// Registr only the 3x3 convolution layers
-BENCHMARK_REGISTER_F(ConvolutionLayerDirectAlexNet, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 2, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerDirectAlexNet, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 3, 1, 4, 8>);
-BENCHMARK_REGISTER_F(ConvolutionLayerDirectAlexNet, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 4, 1, 4, 8>);
diff --git a/tests/benchmark/NEON/FullyConnectedLayer.cpp b/tests/benchmark/NEON/FullyConnectedLayer.cpp
deleted file mode 100644
index 15c2a407e6..0000000000
--- a/tests/benchmark/NEON/FullyConnectedLayer.cpp
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "Globals.h"
-#include "NEON/NEAccessor.h"
-#include "TensorLibrary.h"
-#include "benchmark/Datasets.h"
-#include "benchmark/Profiler.h"
-#include "benchmark/WallClockTimer.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-
-#include "benchmark/benchmark_api.h"
-
-using namespace arm_compute;
-using namespace arm_compute::test;
-using namespace arm_compute::test::benchmark;
-using namespace arm_compute::test::neon;
-
-#include "benchmark/common/FullyConnectedLayer.h"
-
-namespace
-{
-using FullyConnectedLayerAlexNetF32 = FullyConnectedLayer<AlexNetFullyConnectedLayerDataset, Tensor, NEAccessor, NEFullyConnectedLayer>;
-using FullyConnectedLayerAlexNetQS8 = FullyConnectedLayer<AlexNetFullyConnectedLayerDataset, Tensor, NEAccessor, NEFullyConnectedLayer, DataType::QS8>;
-using FullyConnectedLayerLeNet5     = FullyConnectedLayer<LeNet5FullyConnectedLayerDataset, Tensor, NEAccessor, NEFullyConnectedLayer>;
-using FullyConnectedLayerGoogLeNet  = FullyConnectedLayer<GoogLeNetFullyConnectedLayerDataset, Tensor, NEAccessor, NEFullyConnectedLayer>;
-} // namespace
-
-// F32
-BENCHMARK_DEFINE_F(FullyConnectedLayerAlexNetF32, neon_alexnet)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        fc_layer->run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetF32, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetF32, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 1, 1, 4, 8>);
-BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetF32, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 2, 1, 4, 8>);
-
-// QS8
-BENCHMARK_DEFINE_F(FullyConnectedLayerAlexNetQS8, neon_alexnet)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        fc_layer->run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetQS8, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetQS8, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 1, 1, 4, 8>);
-BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetQS8, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 2, 1, 4, 8>);
-
-BENCHMARK_DEFINE_F(FullyConnectedLayerLeNet5, neon_lenet5)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        fc_layer->run();
-        profiler.stop();
-    }
-}
-BENCHMARK_REGISTER_F(FullyConnectedLayerLeNet5, neon_lenet5)
-->Threads(1)
-->Apply(DataSetArgBatched<LeNet5FullyConnectedLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(FullyConnectedLayerLeNet5, neon_lenet5)
-->Threads(1)
-->Apply(DataSetArgBatched<LeNet5FullyConnectedLayerDataset, 1, 1, 4, 8>);
-
-BENCHMARK_DEFINE_F(FullyConnectedLayerGoogLeNet, neon_googlenet)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        fc_layer->run();
-        profiler.stop();
-    }
-}
-BENCHMARK_REGISTER_F(FullyConnectedLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetFullyConnectedLayerDataset, 0, 1, 4, 8>);
diff --git a/tests/benchmark/NEON/GEMM.cpp b/tests/benchmark/NEON/GEMM.cpp
deleted file mode 100644
index 1655c32e2b..0000000000
--- a/tests/benchmark/NEON/GEMM.cpp
+++ /dev/null
@@ -1,777 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "Globals.h"
-#include "NEON/NEAccessor.h"
-#include "TensorLibrary.h"
-#include "benchmark/Datasets.h"
-#include "benchmark/Profiler.h"
-#include "benchmark/WallClockTimer.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-
-#include "benchmark/benchmark_api.h"
-
-using namespace arm_compute;
-using namespace arm_compute::test;
-using namespace arm_compute::test::benchmark;
-using namespace arm_compute::test::neon;
-
-#include "benchmark/NEON/GEMM.h"
-
-namespace
-{
-#ifdef ENABLE_FP16
-using GEMMFP16GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, Tensor, NEAccessor, NEGEMM, DataType::F16>;
-using GEMMFP16GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, Tensor, NEAccessor, NEGEMM, DataType::F16>;
-using FP16MatrixMultiply = GEMM<MatrixMultiplyDataset, Tensor, NEAccessor, NEGEMM, DataType::F16>;
-#endif /* ENABLE_FP16 */
-using GEMMFP32GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, Tensor, NEAccessor, NEGEMM, DataType::F32>;
-using GEMMFP32GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, Tensor, NEAccessor, NEGEMM, DataType::F32>;
-using GEMMQS8GoogLeNet1  = GEMM<GoogLeNetGEMMDataset1, Tensor, NEAccessor, NEGEMM, DataType::QS8>;
-using GEMMQS8GoogLeNet2  = GEMM<GoogLeNetGEMMDataset2, Tensor, NEAccessor, NEGEMM, DataType::QS8>;
-using FP32MatrixMultiply = GEMM<MatrixMultiplyDataset, Tensor, NEAccessor, NEGEMM, DataType::F32>;
-using QS8MatrixMultiply  = GEMM<MatrixMultiplyDataset, Tensor, NEAccessor, NEGEMM, DataType::QS8>;
-} // namespace
-#ifdef ENABLE_FP16
-BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet1, neon_googlenet)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        gemm_layer->run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet2, neon_googlenet)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        gemm_layer->run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>);
-BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>);
-
-BENCHMARK_DEFINE_F(FP16MatrixMultiply, neon_matrix_multiply)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        gemm_layer->run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(FP16MatrixMultiply, neon_matrix_multiply)
-->Threads(1)
-->Apply(DataSetArg<MatrixMultiplyDataset, 0>);
-BENCHMARK_REGISTER_F(FP16MatrixMultiply, neon_matrix_multiply)
-->Threads(1)
-->Apply(DataSetArg<MatrixMultiplyDataset, 1>);
-BENCHMARK_REGISTER_F(FP16MatrixMultiply, neon_matrix_multiply)
-->Threads(1)
-->Apply(DataSetArg<MatrixMultiplyDataset, 2>);
-#endif /* ENABLE_FP16 */
-
-BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet1, neon_googlenet)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        gemm_layer->run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet2, neon_googlenet)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        gemm_layer->run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>);
-BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>);
-
-BENCHMARK_DEFINE_F(GEMMQS8GoogLeNet1, neon_googlenet)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        gemm_layer->run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_DEFINE_F(GEMMQS8GoogLeNet2, neon_googlenet)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        gemm_layer->run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>);
-BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>);
-
-BENCHMARK_DEFINE_F(FP32MatrixMultiply, neon_matrix_multiply)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        gemm_layer->run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(FP32MatrixMultiply, neon_matrix_multiply)
-->Threads(1)
-->Apply(DataSetArg<MatrixMultiplyDataset, 0>);
-BENCHMARK_REGISTER_F(FP32MatrixMultiply, neon_matrix_multiply)
-->Threads(1)
-->Apply(DataSetArg<MatrixMultiplyDataset, 1>);
-BENCHMARK_REGISTER_F(FP32MatrixMultiply, neon_matrix_multiply)
-->Threads(1)
-->Apply(DataSetArg<MatrixMultiplyDataset, 2>);
-
-BENCHMARK_DEFINE_F(QS8MatrixMultiply, neon_matrix_multiply)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        gemm_layer->run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(QS8MatrixMultiply, neon_matrix_multiply)
-->Threads(1)
-->Apply(DataSetArg<MatrixMultiplyDataset, 0>);
-BENCHMARK_REGISTER_F(QS8MatrixMultiply, neon_matrix_multiply)
-->Threads(1)
-->Apply(DataSetArg<MatrixMultiplyDataset, 1>);
-BENCHMARK_REGISTER_F(QS8MatrixMultiply, neon_matrix_multiply)
-->Threads(1)
-->Apply(DataSetArg<MatrixMultiplyDataset, 2>);
diff --git a/tests/benchmark/NEON/GEMM.h b/tests/benchmark/NEON/GEMM.h
deleted file mode 100644
index 4f0557d610..0000000000
--- a/tests/benchmark/NEON/GEMM.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_TEST_BENCHMARK_NEON_GEMM_H__
-#define __ARM_COMPUTE_TEST_BENCHMARK_NEON_GEMM_H__
-
-#include "TensorLibrary.h"
-#include "Utils.h"
-#include "dataset/GEMMDataset.h"
-
-#include <memory>
-
-using namespace arm_compute;
-using namespace arm_compute::test;
-using namespace arm_compute::test::benchmark;
-
-namespace arm_compute
-{
-namespace test
-{
-namespace benchmark
-{
-// FIXME: Merge with CL/GEMM.h into common/GEMM.h after adding F16 support to NEON GEMM and QS8 support to CL GEMM
-template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType data_type>
-class GEMM : public ::benchmark::Fixture
-{
-public:
-    void SetUp(::benchmark::State &state) override
-    {
-#ifdef ENABLE_FP16
-        ARM_COMPUTE_ERROR_ON_MSG(data_type != DataType::F16 && data_type != DataType::F32 && data_type != DataType::QS8, "Unsupported data type for GEMM operation");
-#else  /* ENABLE_FP16 */
-        ARM_COMPUTE_ERROR_ON_MSG(data_type != DataType::F32 && data_type != DataType::QS8, "Unsupported data type for GEMM operation");
-#endif /* ENABLE_FP16 */
-
-        profiler.add(std::make_shared<WallClockTimer>());
-
-        const GEMMDataObject gemm_obj = *(DataSet().begin() + state.range(0));
-
-        TensorShape shape_a = gemm_obj.shape_a;
-        TensorShape shape_b = gemm_obj.shape_b;
-        TensorShape shape_c = gemm_obj.shape_c;
-        TensorShape shape_d = gemm_obj.shape_d;
-
-        // Create tensors
-        a = create_tensor<Tensor>(shape_a, data_type, 1, 4);
-        b = create_tensor<Tensor>(shape_b, data_type, 1, 4);
-        c = create_tensor<Tensor>(shape_c, data_type, 1, 4);
-        d = create_tensor<Tensor>(shape_d, data_type, 1, 4);
-
-        // Create and configure function
-        gemm_layer = std::unique_ptr<Function>(new Function());
-        gemm_layer->configure(&a, &b, &c, &d, gemm_obj.alpha, gemm_obj.beta);
-
-        // Allocate tensors
-        a.allocator()->allocate();
-        b.allocator()->allocate();
-        c.allocator()->allocate();
-        d.allocator()->allocate();
-    }
-
-    void TearDown(::benchmark::State &state) override
-    {
-        gemm_layer.reset();
-
-        a.allocator()->free();
-        b.allocator()->free();
-        c.allocator()->free();
-        d.allocator()->free();
-
-        profiler.submit(state);
-    }
-
-    std::unique_ptr<Function> gemm_layer{ nullptr };
-    Profiler                  profiler{};
-
-private:
-    TensorType a{};
-    TensorType b{};
-    TensorType c{};
-    TensorType d{};
-};
-} // namespace benchmark
-} // namespace test
-} // namespace arm_compute
-#endif //__ARM_COMPUTE_TEST_BENCHMARK_NEON_GEMM_H__
diff --git a/tests/benchmark/NEON/NormalizationLayer.cpp b/tests/benchmark/NEON/NormalizationLayer.cpp
deleted file mode 100644
index 8ea8c95cda..0000000000
--- a/tests/benchmark/NEON/NormalizationLayer.cpp
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "Globals.h"
-#include "NEON/NEAccessor.h"
-#include "TensorLibrary.h"
-#include "benchmark/Datasets.h"
-#include "benchmark/Profiler.h"
-#include "benchmark/WallClockTimer.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NENormalizationLayer.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-
-#include "benchmark/benchmark_api.h"
-
-using namespace arm_compute;
-using namespace arm_compute::test;
-using namespace arm_compute::test::benchmark;
-using namespace arm_compute::test::neon;
-
-#include "benchmark/common/NormalizationLayer.h"
-
-namespace
-{
-using NormalizationLayerAlexNetF32 = NormalizationLayer<AlexNetNormalizationLayerDataset, Tensor, NEAccessor, NENormalizationLayer>;
-using NormalizationLayerAlexNetQS8 = NormalizationLayer<AlexNetNormalizationLayerDataset, Tensor, NEAccessor, NENormalizationLayer, DataType::QS8>;
-using NormalizationLayerGoogLeNet  = NormalizationLayer<GoogLeNetNormalizationLayerDataset, Tensor, NEAccessor, NENormalizationLayer>;
-} // namespace
-
-// F32
-BENCHMARK_DEFINE_F(NormalizationLayerAlexNetF32, neon_alexnet)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        norm_layer->run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(NormalizationLayerAlexNetF32, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(NormalizationLayerAlexNetF32, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 1, 1, 4, 8>);
-
-// QS8
-BENCHMARK_DEFINE_F(NormalizationLayerAlexNetQS8, neon_alexnet)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        norm_layer->run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(NormalizationLayerAlexNetQS8, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(NormalizationLayerAlexNetQS8, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 1, 1, 4, 8>);
-
-BENCHMARK_DEFINE_F(NormalizationLayerGoogLeNet, neon_googlenet)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        norm_layer->run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(NormalizationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetNormalizationLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(NormalizationLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetNormalizationLayerDataset, 1, 1, 4, 8>);
diff --git a/tests/benchmark/NEON/PoolingLayer.cpp b/tests/benchmark/NEON/PoolingLayer.cpp
deleted file mode 100644
index dfa75497cd..0000000000
--- a/tests/benchmark/NEON/PoolingLayer.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "Globals.h"
-#include "NEON/NEAccessor.h"
-#include "TensorLibrary.h"
-#include "benchmark/Datasets.h"
-#include "benchmark/Profiler.h"
-#include "benchmark/WallClockTimer.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-
-#include "benchmark/benchmark_api.h"
-
-using namespace arm_compute;
-using namespace arm_compute::test;
-using namespace arm_compute::test::benchmark;
-using namespace arm_compute::test::neon;
-
-#include "benchmark/common/PoolingLayer.h"
-
-namespace
-{
-using PoolingLayerAlexNetF32 = PoolingLayer<AlexNetPoolingLayerDataset, Tensor, NEAccessor, NEPoolingLayer>;
-using PoolingLayerAlexNetQS8 = PoolingLayer<AlexNetPoolingLayerDataset, Tensor, NEAccessor, NEPoolingLayer, DataType::QS8>;
-using PoolingLayerLeNet5     = PoolingLayer<LeNet5PoolingLayerDataset, Tensor, NEAccessor, NEPoolingLayer>;
-using PoolingLayerGoogLeNet  = PoolingLayer<GoogLeNetPoolingLayerDataset, Tensor, NEAccessor, NEPoolingLayer>;
-} // namespace
-
-// F32
-BENCHMARK_DEFINE_F(PoolingLayerAlexNetF32, neon_alexnet)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        pool_layer.run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(PoolingLayerAlexNetF32, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerAlexNetF32, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 1, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerAlexNetF32, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 2, 1, 4, 8>);
-
-// QS8
-BENCHMARK_DEFINE_F(PoolingLayerAlexNetQS8, neon_alexnet)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        pool_layer.run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(PoolingLayerAlexNetQS8, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerAlexNetQS8, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 1, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerAlexNetQS8, neon_alexnet)
-->Threads(1)
-->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 2, 1, 4, 8>);
-
-BENCHMARK_DEFINE_F(PoolingLayerLeNet5, neon_lenet5)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        pool_layer.run();
-        profiler.stop();
-    }
-}
-
-BENCHMARK_REGISTER_F(PoolingLayerLeNet5, neon_lenet5)
-->Threads(1)
-->Apply(DataSetArgBatched<LeNet5PoolingLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerLeNet5, neon_lenet5)
-->Threads(1)
-->Apply(DataSetArgBatched<LeNet5PoolingLayerDataset, 1, 1, 4, 8>);
-
-BENCHMARK_DEFINE_F(PoolingLayerGoogLeNet, neon_googlenet)
-(::benchmark::State &state)
-{
-    while(state.KeepRunning())
-    {
-        // Run function
-        profiler.start();
-        pool_layer.run();
-        profiler.stop();
-    }
-}
-
-// FIXME: Add support for 7x7 pooling layer pool5/7x7_s1
-BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 0, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 1, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 2, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 3, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 4, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 5, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 6, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 7, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 8, 1, 4, 8>);
-BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet)
-->Threads(1)
-->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 9, 1, 4, 8>);