diff options
author | Anthony Barbier <anthony.barbier@arm.com> | 2017-09-04 18:44:23 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-09-17 13:03:09 +0100 |
commit | 6ff3b19ee6120edf015fad8caab2991faa3070af (patch) | |
tree | a7a6dcd16dfd56d79fa1b56a313caeebcc939b68 /tests/benchmark | |
download | ComputeLibrary-6ff3b19ee6120edf015fad8caab2991faa3070af.tar.gz |
COMPMID-344 Updated doxygen
Change-Id: I32f7b84daa560e460b77216add529c8fa8b327ae
Diffstat (limited to 'tests/benchmark')
44 files changed, 5730 insertions, 0 deletions
diff --git a/tests/benchmark/CL/ActivationLayer.cpp b/tests/benchmark/CL/ActivationLayer.cpp new file mode 100644 index 0000000000..5180d3d900 --- /dev/null +++ b/tests/benchmark/CL/ActivationLayer.cpp @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "CL/CLAccessor.h" +#include "CL/Helper.h" +#include "Globals.h" +#include "TensorLibrary.h" +#include "benchmark/Datasets.h" +#include "benchmark/Profiler.h" +#include "benchmark/WallClockTimer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLActivationLayer.h" + +#include "benchmark/benchmark_api.h" + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; +using namespace arm_compute::test::cl; + +#include "benchmark/common/ActivationLayer.h" + +namespace +{ +using ActivationLayerAlexNet = ActivationLayer<AlexNetActivationLayerDataset, CLTensor, CLAccessor, CLActivationLayer>; +using ActivationLayerLeNet5 = ActivationLayer<LeNet5ActivationLayerDataset, CLTensor, CLAccessor, CLActivationLayer>; +using ActivationLayerGoogLeNet = ActivationLayer<GoogLeNetActivationLayerDataset, CLTensor, CLAccessor, CLActivationLayer>; +} // namespace + +BENCHMARK_DEFINE_F(ActivationLayerAlexNet, cl_alexnet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + act_layer.run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 1, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 2, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 3, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 4, 1, 4, 8>); + +BENCHMARK_DEFINE_F(ActivationLayerLeNet5, cl_lenet5) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + act_layer.run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(ActivationLayerLeNet5, cl_lenet5) +->Threads(1) +->Apply(DataSetArgBatched<LeNet5ActivationLayerDataset, 0, 1, 4, 8>); + +BENCHMARK_DEFINE_F(ActivationLayerGoogLeNet, cl_googlenet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + act_layer.run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 1, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 2, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 3, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 4, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 5, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 6, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 7, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 8, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 9, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 10, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 11, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 12, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 13, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 14, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 15, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 16, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 17, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 18, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 19, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 20, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 21, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 22, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 23, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 24, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 25, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 26, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 27, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 28, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 29, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 30, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 31, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 32, 1, 4, 8>); diff --git a/tests/benchmark/CL/BitwiseAnd.cpp b/tests/benchmark/CL/BitwiseAnd.cpp new file mode 100644 index 0000000000..a3deb3eb5b --- /dev/null +++ b/tests/benchmark/CL/BitwiseAnd.cpp @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "CL/CLAccessor.h" +#include "CL/Helper.h" +#include "Globals.h" +#include "TensorLibrary.h" +#include "benchmark/Datasets.h" +#include "benchmark/Profiler.h" +#include "benchmark/WallClockTimer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLBitwiseAnd.h" + +#include "benchmark/benchmark_api.h" + +#include <memory> +#include <string> + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; +using namespace arm_compute::test::cl; + +namespace +{ +template <typename DataSet> +class BitwiseAnd : public ::benchmark::Fixture +{ +public: + void SetUp(::benchmark::State &state) override + { + ::benchmark::Fixture::SetUp(state); + + profiler.add(std::make_shared<WallClockTimer>()); + + const std::string image_name = *(DataSet().begin() + state.range(0)); + + // Create tensors + src1 = create_tensor(image_name, DataType::U8); + src2 = create_tensor(image_name, DataType::U8); + dst = create_tensor(image_name, DataType::U8); + + // Create and configure function + band.configure(&src1, &src2, &dst); + + // Allocate tensors + src1.allocator()->allocate(); + src2.allocator()->allocate(); + dst.allocator()->allocate(); + + // Fill source tensors + library->fill(CLAccessor(src1), image_name, Channel::R); + library->fill(CLAccessor(src2), image_name, Channel::G); + } + + void TearDown(::benchmark::State &state) override + { + profiler.submit(state); + + ::benchmark::Fixture::TearDown(state); + } + + CLBitwiseAnd band{}; + Profiler profiler{}; + +private: + CLTensor src1{}; + CLTensor src2{}; + CLTensor dst{}; +}; + +using BitwiseAndSmall = BitwiseAnd<SmallImages>; +using BitwiseAndLarge = BitwiseAnd<LargeImages>; +} // namespace + +BENCHMARK_DEFINE_F(BitwiseAndSmall, cl_bitwise_and) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + band.run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(BitwiseAndSmall, cl_bitwise_and) +->Threads(1) +->Apply(DataSetArgs<SmallImages>); + +BENCHMARK_DEFINE_F(BitwiseAndLarge, cl_bitwise_and) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + band.run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(BitwiseAndLarge, cl_bitwise_and) +->Threads(1) +->Apply(DataSetArgs<LargeImages>); diff --git a/tests/benchmark/CL/CMakeLists.txt b/tests/benchmark/CL/CMakeLists.txt new file mode 100644 index 0000000000..8493309f40 --- /dev/null +++ b/tests/benchmark/CL/CMakeLists.txt @@ -0,0 +1,57 @@ +# Copyright (c) 2017 ARM Limited. +# +# SPDX-License-Identifier: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +cmake_minimum_required (VERSION 3.1) + +include_directories(${CMAKE_SOURCE_DIR}/../include) + +set(arm_compute_test_benchmark_TARGET_DEFINITIONS + ${arm_compute_test_benchmark_TARGET_DEFINITIONS} + -DOPENCL + PARENT_SCOPE +) + +set(arm_compute_test_benchmark_TARGET_INCLUDES + ${arm_compute_test_benchmark_TARGET_INCLUDES} + ${CMAKE_SOURCE_DIR}/../include + PARENT_SCOPE +) + +set(arm_compute_test_benchmark_OPENCL_SOURCE_FILES + ${CMAKE_SOURCE_DIR}/CL/CLAccessor.h + ${CMAKE_CURRENT_SOURCE_DIR}/Bitwise/BitwiseAnd.cpp +) + +add_library(arm_compute_test_benchmark_OPENCL OBJECT + ${arm_compute_test_benchmark_OPENCL_SOURCE_FILES} +) + +set(arm_compute_test_benchmark_TARGET_OBJECTS + ${arm_compute_test_benchmark_TARGET_OBJECTS} + $<TARGET_OBJECTS:arm_compute_test_benchmark_OPENCL> + PARENT_SCOPE +) + +set(arm_compute_test_benchmark_TARGET_LIBRARIES + ${arm_compute_test_benchmark_TARGET_LIBRARIES} + OpenCL + PARENT_SCOPE +) diff --git a/tests/benchmark/CL/ConvolutionLayer.cpp b/tests/benchmark/CL/ConvolutionLayer.cpp new file mode 100644 index 0000000000..e1f4fabdc3 --- /dev/null +++ b/tests/benchmark/CL/ConvolutionLayer.cpp @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "CL/CLAccessor.h" +#include "CL/Helper.h" +#include "Globals.h" +#include "TensorLibrary.h" +#include "benchmark/Datasets.h" +#include "benchmark/Profiler.h" +#include "benchmark/WallClockTimer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" + +#include "benchmark/benchmark_api.h" + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; +using namespace arm_compute::test::cl; + +#include "benchmark/common/ConvolutionLayer.h" + +namespace +{ +using ConvolutionLayerAlexNet = ConvolutionLayer<AlexNetConvolutionLayerDataset, CLTensor, CLAccessor, CLConvolutionLayer>; +using ConvolutionLayerLeNet5 = ConvolutionLayer<LeNet5ConvolutionLayerDataset, CLTensor, CLAccessor, CLConvolutionLayer>; +using ConvolutionLayerGoogLeNet1 = ConvolutionLayer<GoogLeNetConvolutionLayerDataset1, CLTensor, CLAccessor, CLConvolutionLayer>; +using ConvolutionLayerGoogLeNet2 = ConvolutionLayer<GoogLeNetConvolutionLayerDataset2, CLTensor, CLAccessor, CLConvolutionLayer>; +} // namespace + +BENCHMARK_DEFINE_F(ConvolutionLayerAlexNet, cl_alexnet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + conv_layer->run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 1, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 2, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 3, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 4, 1, 4, 8>); + +BENCHMARK_DEFINE_F(ConvolutionLayerLeNet5, cl_lenet5) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + conv_layer->run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(ConvolutionLayerLeNet5, cl_lenet5) +->Threads(1) +->Apply(DataSetArgBatched<LeNet5ConvolutionLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerLeNet5, cl_lenet5) +->Threads(1) +->Apply(DataSetArgBatched<LeNet5ConvolutionLayerDataset, 1, 1, 4, 8>); + +BENCHMARK_DEFINE_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + conv_layer->run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_DEFINE_F(ConvolutionLayerGoogLeNet2, cl_googlenet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + conv_layer->run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 1, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 2, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 3, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 4, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 5, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 6, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 7, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 8, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 9, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 10, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 11, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 12, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 13, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 14, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 15, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 16, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 17, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 18, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 19, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 20, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 21, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 22, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 23, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 24, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 25, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 26, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 27, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 28, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 29, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 30, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 31, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 1, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 2, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 3, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 4, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 5, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 6, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 7, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 8, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 9, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 10, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 11, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 12, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 13, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 14, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 15, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 16, 1, 4, 8>); diff --git a/tests/benchmark/CL/FullyConnectedLayer.cpp b/tests/benchmark/CL/FullyConnectedLayer.cpp new file mode 100644 index 0000000000..6e8c89fa0b --- /dev/null +++ b/tests/benchmark/CL/FullyConnectedLayer.cpp @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "CL/CLAccessor.h" +#include "CL/Helper.h" +#include "Globals.h" +#include "TensorLibrary.h" +#include "benchmark/Datasets.h" +#include "benchmark/Profiler.h" +#include "benchmark/WallClockTimer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" + +#include "benchmark/benchmark_api.h" + +#include <memory> +#include <string> + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; +using namespace arm_compute::test::cl; + +#include "benchmark/common/FullyConnectedLayer.h" + +namespace +{ +using FullyConnectedLayerAlexNet = FullyConnectedLayer<AlexNetFullyConnectedLayerDataset, CLTensor, CLAccessor, CLFullyConnectedLayer>; +using FullyConnectedLayerLeNet5 = FullyConnectedLayer<LeNet5FullyConnectedLayerDataset, CLTensor, CLAccessor, CLFullyConnectedLayer>; +using FullyConnectedLayerGoogLeNet = FullyConnectedLayer<GoogLeNetFullyConnectedLayerDataset, CLTensor, CLAccessor, CLFullyConnectedLayer>; +} // namespace + +BENCHMARK_DEFINE_F(FullyConnectedLayerAlexNet, cl_alexnet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + fc_layer->run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNet, cl_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNet, cl_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 1, 1, 4, 8>); +BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNet, cl_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 2, 1, 4, 8>); + +BENCHMARK_DEFINE_F(FullyConnectedLayerLeNet5, cl_lenet5) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + fc_layer->run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(FullyConnectedLayerLeNet5, cl_lenet5) +->Threads(1) +->Apply(DataSetArgBatched<LeNet5FullyConnectedLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(FullyConnectedLayerLeNet5, cl_lenet5) +->Threads(1) +->Apply(DataSetArgBatched<LeNet5FullyConnectedLayerDataset, 1, 1, 4, 8>); + +BENCHMARK_DEFINE_F(FullyConnectedLayerGoogLeNet, cl_googlenet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + fc_layer->run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(FullyConnectedLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetFullyConnectedLayerDataset, 0, 1, 4, 8>); diff --git a/tests/benchmark/CL/GEMM.cpp b/tests/benchmark/CL/GEMM.cpp new file mode 100644 index 0000000000..b90556df48 --- /dev/null +++ b/tests/benchmark/CL/GEMM.cpp @@ -0,0 +1,492 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "CL/CLAccessor.h" +#include "CL/Helper.h" +#include "Globals.h" +#include "TensorLibrary.h" +#include "benchmark/Datasets.h" +#include "benchmark/Profiler.h" +#include "benchmark/WallClockTimer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLGEMM.h" + +#include "benchmark/benchmark_api.h" + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; +using namespace arm_compute::test::cl; + +#include "benchmark/CL/GEMM.h" + +namespace +{ +using GEMMFP16GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, CLTensor, CLAccessor, CLGEMM, DataType::F16>; +using GEMMFP16GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, CLTensor, CLAccessor, CLGEMM, DataType::F16>; +using GEMMFP32GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, CLTensor, CLAccessor, CLGEMM, DataType::F32>; +using GEMMFP32GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, CLTensor, CLAccessor, CLGEMM, DataType::F32>; +} // namespace + +BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet1, cl_googlenet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + gemm_layer->run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet2, cl_googlenet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + gemm_layer->run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>); + +BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet1, cl_googlenet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + gemm_layer->run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet2, cl_googlenet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + gemm_layer->run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>); diff --git a/tests/benchmark/CL/GEMM.h b/tests/benchmark/CL/GEMM.h new file mode 100644 index 0000000000..02a339609c --- /dev/null +++ b/tests/benchmark/CL/GEMM.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TEST_BENCHMARK_CL_GEMM_H__ +#define __ARM_COMPUTE_TEST_BENCHMARK_CL_GEMM_H__ + +#include "TensorLibrary.h" +#include "Utils.h" +#include "dataset/GEMMDataset.h" + +#include <memory> + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; + +namespace arm_compute +{ +namespace test +{ +namespace benchmark +{ +// FIXME: Merge with NEON/GEMM.h into common/GEMM.h after adding F16 support to NEON GEMM and QS8 support to CL GEMM +template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType data_type> +class GEMM : public ::benchmark::Fixture +{ +public: + void SetUp(::benchmark::State &state) override + { + ARM_COMPUTE_ERROR_ON_MSG(data_type != DataType::F16 && data_type != DataType::F32, "Unsupported data type for GEMM operation"); + + profiler.add(std::make_shared<WallClockTimer>()); + + const GEMMDataObject gemm_obj = *(DataSet().begin() + state.range(0)); + + TensorShape shape_a = gemm_obj.shape_a; + TensorShape shape_b = gemm_obj.shape_b; + TensorShape shape_c = gemm_obj.shape_c; + TensorShape shape_d = gemm_obj.shape_d; + + // Create tensors + a = create_tensor(shape_a, data_type); + b = create_tensor(shape_b, data_type); + c = create_tensor(shape_c, data_type); + d = create_tensor(shape_d, data_type); + + // Create and configure function + gemm_layer = std::unique_ptr<Function>(new Function()); + gemm_layer->configure(&a, &b, &c, &d, gemm_obj.alpha, gemm_obj.beta); + + // Allocate tensors + a.allocator()->allocate(); + b.allocator()->allocate(); + c.allocator()->allocate(); + d.allocator()->allocate(); + } + + void TearDown(::benchmark::State &state) override + { + gemm_layer.reset(); + + a.allocator()->free(); + b.allocator()->free(); + c.allocator()->free(); + d.allocator()->free(); + + profiler.submit(state); + } + + std::unique_ptr<Function> gemm_layer{ nullptr }; + Profiler profiler{}; + +private: + TensorType a{}; + TensorType b{}; + TensorType c{}; + TensorType d{}; +}; +} // namespace benchmark +} // namespace test +} // namespace arm_compute +#endif //__ARM_COMPUTE_TEST_BENCHMARK_CL_GEMM_H__ diff --git a/tests/benchmark/CL/NormalizationLayer.cpp b/tests/benchmark/CL/NormalizationLayer.cpp new file mode 100644 index 0000000000..81d3c65912 --- /dev/null +++ b/tests/benchmark/CL/NormalizationLayer.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "CL/CLAccessor.h" +#include "CL/Helper.h" +#include "Globals.h" +#include "TensorLibrary.h" +#include "benchmark/Datasets.h" +#include "benchmark/Profiler.h" +#include "benchmark/WallClockTimer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h" + +#include "benchmark/benchmark_api.h" + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; +using namespace arm_compute::test::cl; + +#include "benchmark/common/NormalizationLayer.h" + +namespace +{ +using NormalizationLayerAlexNet = NormalizationLayer<AlexNetNormalizationLayerDataset, CLTensor, CLAccessor, CLNormalizationLayer>; +using NormalizationLayerGoogLeNet = NormalizationLayer<GoogLeNetNormalizationLayerDataset, CLTensor, CLAccessor, CLNormalizationLayer>; + +} // namespace + +BENCHMARK_DEFINE_F(NormalizationLayerAlexNet, cl_alexnet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + norm_layer->run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(NormalizationLayerAlexNet, cl_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(NormalizationLayerAlexNet, cl_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 1, 1, 4, 8>); + +BENCHMARK_DEFINE_F(NormalizationLayerGoogLeNet, cl_googlenet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + norm_layer->run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(NormalizationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetNormalizationLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(NormalizationLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetNormalizationLayerDataset, 1, 1, 4, 8>); diff --git a/tests/benchmark/CL/PoolingLayer.cpp b/tests/benchmark/CL/PoolingLayer.cpp new file mode 100644 index 0000000000..5285f279e7 --- /dev/null +++ b/tests/benchmark/CL/PoolingLayer.cpp @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "CL/CLAccessor.h" +#include "CL/Helper.h" +#include "Globals.h" +#include "TensorLibrary.h" +#include "benchmark/Datasets.h" +#include "benchmark/Profiler.h" +#include "benchmark/WallClockTimer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h" + +#include "benchmark/benchmark_api.h" + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; +using namespace arm_compute::test::cl; + +#include "benchmark/common/PoolingLayer.h" + +namespace +{ +using PoolingLayerAlexNet = PoolingLayer<AlexNetPoolingLayerDataset, CLTensor, CLAccessor, CLPoolingLayer>; +using PoolingLayerLeNet5 = PoolingLayer<LeNet5PoolingLayerDataset, CLTensor, CLAccessor, CLPoolingLayer>; +using PoolingLayerGoogLeNet = PoolingLayer<GoogLeNetPoolingLayerDataset, CLTensor, CLAccessor, CLPoolingLayer>; +} // namespace + +BENCHMARK_DEFINE_F(PoolingLayerAlexNet, cl_alexnet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + pool_layer.run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(PoolingLayerAlexNet, cl_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerAlexNet, cl_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 1, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerAlexNet, cl_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 2, 1, 4, 8>); + +BENCHMARK_DEFINE_F(PoolingLayerLeNet5, cl_lenet5) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + pool_layer.run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(PoolingLayerLeNet5, cl_lenet5) +->Threads(1) +->Apply(DataSetArgBatched<LeNet5PoolingLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerLeNet5, cl_lenet5) +->Threads(1) +->Apply(DataSetArgBatched<LeNet5PoolingLayerDataset, 1, 1, 4, 8>); + +BENCHMARK_DEFINE_F(PoolingLayerGoogLeNet, cl_googlenet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + pool_layer.run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +// FIXME: Add support for 7x7 pooling layer pool5/7x7_s1 +BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 1, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 2, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 3, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 4, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 5, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 6, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 7, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 8, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 9, 1, 4, 8>); diff --git a/tests/benchmark/CMakeLists.txt b/tests/benchmark/CMakeLists.txt new file mode 100644 index 0000000000..115333a1b0 --- /dev/null +++ b/tests/benchmark/CMakeLists.txt @@ -0,0 +1,100 @@ +# Copyright (c) 2017 ARM Limited. +# +# SPDX-License-Identifier: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +cmake_minimum_required (VERSION 3.1) + +add_library(benchmark STATIC IMPORTED) +set_target_properties(benchmark PROPERTIES + IMPORTED_LOCATION "${CMAKE_SOURCE_DIR}/../3rdparty/linux/armv7a/libbenchmark.a" +) + +add_library(OpenCL SHARED IMPORTED) +set_target_properties(OpenCL PROPERTIES + IMPORTED_LOCATION "${CMAKE_SOURCE_DIR}/../build/opencl-1.2-stubs/libOpenCL.so" + IMPORTED_NO_SONAME 1 +) + +option(ENABLE_PMU_COUNTER "Compile with PMU counter support") + +set(ARM_COMPUTE_TARGETS_TO_MEASURE "all" CACHE STRING "Semicolon-separated list of targets to include in validation.") + +set(ARM_COMPUTE_ALL_TARGETS + NEON + CL +) + +if(ARM_COMPUTE_TARGETS_TO_MEASURE STREQUAL "all") + set(ARM_COMPUTE_TARGETS_TO_MEASURE ${ARM_COMPUTE_ALL_TARGETS}) +endif() + +list(REMOVE_DUPLICATES ARM_COMPUTE_TARGETS_TO_MEASURE) + +foreach(TARGET ${ARM_COMPUTE_TARGETS_TO_MEASURE}) + list(FIND ARM_COMPUTE_ALL_TARGETS ${TARGET} idx) + + if(${idx} LESS 0) + message(FATAL_ERROR "The target '${TARGET}' does not exist. It should be one of\n${ARM_COMPUTE_ALL_TARGETS}") + else() + add_subdirectory(${TARGET}) + endif() +endforeach() + +set(arm_compute_test_benchmark_SOURCE_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/Datasets.h + ${CMAKE_CURRENT_SOURCE_DIR}/Instrument.h + ${CMAKE_CURRENT_SOURCE_DIR}/Profiler.h + ${CMAKE_CURRENT_SOURCE_DIR}/Profiler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/PerformanceProgramOptions.h + ${CMAKE_CURRENT_SOURCE_DIR}/PerformanceProgramOptions.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/PerformanceUserConfiguration.h + ${CMAKE_CURRENT_SOURCE_DIR}/PerformanceUserConfiguration.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/WallClockTimer.h + ${CMAKE_CURRENT_SOURCE_DIR}/WallClockTimer.cpp +) + +if(${ENABLE_PMU_COUNTER}) + list(APPEND arm_compute_test_benchmark_SOURCE_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/PMUCounter.h + ${CMAKE_CURRENT_SOURCE_DIR}/PMUCounter.cpp + ) +endif() + +add_library(arm_compute_test_benchmark OBJECT + ${arm_compute_test_benchmark_SOURCE_FILES} +) + +add_definitions(${arm_compute_test_benchmark_TARGET_DEFINITIONS}) +include_directories(${arm_compute_test_benchmark_TARGET_INCLUDES}) + +add_executable(arm_compute_benchmark + $<TARGET_OBJECTS:arm_compute_test_benchmark> + ${arm_compute_test_benchmark_TARGET_OBJECTS} + $<TARGET_OBJECTS:tensor_library> + $<TARGET_OBJECTS:arm_compute_test> +) + +target_link_libraries(arm_compute_benchmark + benchmark + boost_program_options + arm_compute + ${arm_compute_test_benchmark_TARGET_LIBRARIES} +) diff --git a/tests/benchmark/Datasets.h b/tests/benchmark/Datasets.h new file mode 100644 index 0000000000..e7bfb6f10f --- /dev/null +++ b/tests/benchmark/Datasets.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TEST_BENCHMARK_DATASETS_H__ +#define __ARM_COMPUTE_TEST_BENCHMARK_DATASETS_H__ + +#include "dataset/ActivationLayerDataset.h" +#include "dataset/BorderModeDataset.h" +#include "dataset/ConvolutionLayerDataset.h" +#include "dataset/DataTypeDatasets.h" +#include "dataset/FullyConnectedLayerDataset.h" +#include "dataset/GEMMDataset.h" +#include "dataset/ImageDatasets.h" +#include "dataset/InterpolationPolicyDataset.h" +#include "dataset/NormalizationLayerDataset.h" +#include "dataset/PoolingLayerDataset.h" +#include "dataset/ShapeDatasets.h" + +#include "benchmark/benchmark_api.h" + +#include <array> + +namespace arm_compute +{ +namespace test +{ +namespace benchmark +{ +template <typename DataSet, int N> +void DataSetArg(::benchmark::internal::Benchmark *b) +{ + b->Arg(N); + b->ArgName(std::string(*(DataSet().begin() + N))); +} + +template <typename DataSet, int N, unsigned int... Args> +void DataSetArgBatched(::benchmark::internal::Benchmark *b) +{ + constexpr std::array<unsigned int, sizeof...(Args)> batches{ { Args... } }; + for(const auto &el : batches) + { + b->Args({ N, static_cast<int>(el) }); + } + b->ArgNames({ std::string(*(DataSet().begin() + N)), "batch_size" }); +} + +template <typename DataSet> +void DataSetArgs(::benchmark::internal::Benchmark *b) +{ + for(size_t i = 0; i < DataSet().size(); ++i) + { + b->Arg(i); + b->ArgName(*(DataSet().begin() + i)); + } +} +} +} +} +#endif diff --git a/tests/benchmark/Instrument.h b/tests/benchmark/Instrument.h new file mode 100644 index 0000000000..39b0088670 --- /dev/null +++ b/tests/benchmark/Instrument.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TEST_BENCHMARK_INSTRUMENT_H__ +#define __ARM_COMPUTE_TEST_BENCHMARK_INSTRUMENT_H__ + +#include "Utils.h" + +#include <memory> +#include <string> + +namespace arm_compute +{ +namespace test +{ +namespace benchmark +{ +/** Interface for classes that can be used to measure performance. */ +class Instrument +{ +public: + /** Interface defining a measurement, e.g. time, cycles, ... */ + class IMeasurement + { + public: + IMeasurement() = default; + IMeasurement(const IMeasurement &) = default; + IMeasurement(IMeasurement &&) = default; + IMeasurement &operator=(const IMeasurement &) = default; + IMeasurement &operator=(IMeasurement &&) = default; + virtual ~IMeasurement() = default; + + virtual operator double() const = 0; + }; + + /** Implementation of a Measurement class for arihtmetic types. */ + template <typename T> + class Measurement : public IMeasurement + { + public: + /** Store the given value as measurement. + * + * @param[in] value Measured value. + */ + Measurement(T value); + + operator double() const override; + + private: + T _value; + }; + + Instrument() = default; + Instrument(const Instrument &) = default; + Instrument(Instrument &&) = default; + Instrument &operator=(const Instrument &) = default; + Instrument &operator=(Instrument &&) = default; + virtual ~Instrument() = default; + + /** Identifier for the instrument */ + virtual std::string id() const = 0; + + /** Start measuring. */ + virtual void start() = 0; + + /** Stop measuring. */ + virtual void stop() = 0; + + /** Return the latest measurement. */ + virtual std::unique_ptr<IMeasurement> get_measurement() const = 0; +}; + +template <typename T> +Instrument::Measurement<T>::Measurement(T value) + : _value{ value } +{ +} + +template <typename T> +Instrument::Measurement<T>::operator double() const +{ + return _value; +} +} // namespace benchmark +} // namespace test +} // namespace arm_compute +#endif diff --git a/tests/benchmark/NEON/ActivationLayer.cpp b/tests/benchmark/NEON/ActivationLayer.cpp new file mode 100644 index 0000000000..8faed9f831 --- /dev/null +++ b/tests/benchmark/NEON/ActivationLayer.cpp @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "Globals.h" +#include "NEON/Helper.h" +#include "NEON/NEAccessor.h" +#include "TensorLibrary.h" +#include "benchmark/Datasets.h" +#include "benchmark/Profiler.h" +#include "benchmark/WallClockTimer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include "benchmark/benchmark_api.h" + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; +using namespace arm_compute::test::neon; + +#include "benchmark/common/ActivationLayer.h" + +namespace +{ +using ActivationLayerAlexNetF32 = ActivationLayer<AlexNetActivationLayerDataset, Tensor, NEAccessor, NEActivationLayer>; +using ActivationLayerAlexNetQS8 = ActivationLayer<AlexNetActivationLayerDataset, Tensor, NEAccessor, NEActivationLayer, DataType::QS8>; +using ActivationLayerLeNet5 = ActivationLayer<LeNet5ActivationLayerDataset, Tensor, NEAccessor, NEActivationLayer, DataType::F32>; +using ActivationLayerGoogLeNet = ActivationLayer<GoogLeNetActivationLayerDataset, Tensor, NEAccessor, NEActivationLayer, DataType::F32>; +} // namespace + +// F32 +BENCHMARK_DEFINE_F(ActivationLayerAlexNetF32, neon_alexnet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + act_layer.run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(ActivationLayerAlexNetF32, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerAlexNetF32, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 1, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerAlexNetF32, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 2, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerAlexNetF32, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 3, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerAlexNetF32, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 4, 1, 4, 8>); + +// QS8 +BENCHMARK_DEFINE_F(ActivationLayerAlexNetQS8, neon_alexnet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + act_layer.run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(ActivationLayerAlexNetQS8, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerAlexNetQS8, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 1, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerAlexNetQS8, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 2, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerAlexNetQS8, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 3, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerAlexNetQS8, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 4, 1, 4, 8>); + +BENCHMARK_DEFINE_F(ActivationLayerLeNet5, neon_lenet5) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + act_layer.run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(ActivationLayerLeNet5, neon_lenet5) +->Threads(1) +->Apply(DataSetArgBatched<LeNet5ActivationLayerDataset, 0, 1, 4, 8>); + +BENCHMARK_DEFINE_F(ActivationLayerGoogLeNet, neon_googlenet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + act_layer.run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 1, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 2, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 3, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 4, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 5, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 6, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 7, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 8, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 9, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 10, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 11, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 12, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 13, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 14, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 15, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 16, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 17, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 18, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 19, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 20, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 21, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 22, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 23, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 24, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 25, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 26, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 27, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 28, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 29, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 30, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 31, 1, 4, 8>); +BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 32, 1, 4, 8>); diff --git a/tests/benchmark/NEON/BitwiseAnd.cpp b/tests/benchmark/NEON/BitwiseAnd.cpp new file mode 100644 index 0000000000..dba3d1ebea --- /dev/null +++ b/tests/benchmark/NEON/BitwiseAnd.cpp @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "Globals.h" +#include "NEON/Helper.h" +#include "NEON/NEAccessor.h" +#include "TensorLibrary.h" +#include "benchmark/Datasets.h" +#include "benchmark/Profiler.h" +#include "benchmark/WallClockTimer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEBitwiseAnd.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include "benchmark/benchmark_api.h" + +#include <memory> +#include <string> + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; +using namespace arm_compute::test::neon; + +namespace +{ +template <typename DataSet> +class BitwiseAnd : public ::benchmark::Fixture +{ +public: + void SetUp(::benchmark::State &state) override + { + profiler.add(std::make_shared<WallClockTimer>()); + + const std::string image_name = *(DataSet().begin() + state.range(0)); + + // Create tensors + src1 = create_tensor(image_name, DataType::U8); + src2 = create_tensor(image_name, DataType::U8); + dst = create_tensor(image_name, DataType::U8); + + // Create and configure function + band.configure(&src1, &src2, &dst); + + // Allocate tensors + src1.allocator()->allocate(); + src2.allocator()->allocate(); + dst.allocator()->allocate(); + + // Fill source tensors + library->fill(NEAccessor(src1), image_name, Channel::R); + library->fill(NEAccessor(src2), image_name, Channel::G); + } + + void TearDown(::benchmark::State &state) override + { + profiler.submit(state); + } + + NEBitwiseAnd band{}; + Profiler profiler{}; + +private: + Tensor src1{}; + Tensor src2{}; + Tensor dst{}; +}; + +using BitwiseAndSmall = BitwiseAnd<SmallImages>; +using BitwiseAndLarge = BitwiseAnd<LargeImages>; +} // namespace + +BENCHMARK_DEFINE_F(BitwiseAndSmall, neon_bitwise_and) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + band.run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(BitwiseAndSmall, neon_bitwise_and) +->Threads(1) +->Apply(DataSetArgs<SmallImages>); + +BENCHMARK_DEFINE_F(BitwiseAndLarge, neon_bitwise_and) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + band.run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(BitwiseAndLarge, neon_bitwise_and) +->Threads(1) +->Apply(DataSetArgs<LargeImages>); diff --git a/tests/benchmark/NEON/CMakeLists.txt b/tests/benchmark/NEON/CMakeLists.txt new file mode 100644 index 0000000000..2cb3eb36c9 --- /dev/null +++ b/tests/benchmark/NEON/CMakeLists.txt @@ -0,0 +1,37 @@ +# Copyright (c) 2017 ARM Limited. +# +# SPDX-License-Identifier: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +cmake_minimum_required (VERSION 3.1) + +set(arm_compute_test_benchmark_NEON_SOURCE_FILES + ${CMAKE_SOURCE_DIR}/NEON/NEAccessor.h + ${CMAKE_CURRENT_SOURCE_DIR}/Bitwise/BitwiseAnd.cpp +) + +add_library(arm_compute_test_benchmark_NEON OBJECT + ${arm_compute_test_benchmark_NEON_SOURCE_FILES} +) + +SET(arm_compute_test_benchmark_TARGET_OBJECTS + ${arm_compute_test_benchmark_TARGET_OBJECTS} + $<TARGET_OBJECTS:arm_compute_test_benchmark_NEON> + PARENT_SCOPE +) diff --git a/tests/benchmark/NEON/ConvolutionLayer.cpp b/tests/benchmark/NEON/ConvolutionLayer.cpp new file mode 100644 index 0000000000..0cfff8494b --- /dev/null +++ b/tests/benchmark/NEON/ConvolutionLayer.cpp @@ -0,0 +1,303 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "Globals.h" +#include "NEON/Helper.h" +#include "NEON/NEAccessor.h" +#include "TensorLibrary.h" +#include "benchmark/Datasets.h" +#include "benchmark/Profiler.h" +#include "benchmark/WallClockTimer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include "benchmark/benchmark_api.h" + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; +using namespace arm_compute::test::neon; + +#include "benchmark/common/ConvolutionLayer.h" + +namespace +{ +using ConvolutionLayerAlexNetF32 = ConvolutionLayer<AlexNetConvolutionLayerDataset, Tensor, NEAccessor, NEConvolutionLayer>; +using ConvolutionLayerAlexNetQS8 = ConvolutionLayer<AlexNetConvolutionLayerDataset, Tensor, NEAccessor, NEConvolutionLayer, DataType::QS8>; +using ConvolutionLayerLeNet5 = ConvolutionLayer<LeNet5ConvolutionLayerDataset, Tensor, NEAccessor, NEConvolutionLayer>; +using ConvolutionLayerGoogLeNet1 = ConvolutionLayer<GoogLeNetConvolutionLayerDataset1, Tensor, NEAccessor, NEConvolutionLayer>; +using ConvolutionLayerGoogLeNet2 = ConvolutionLayer<GoogLeNetConvolutionLayerDataset2, Tensor, NEAccessor, NEConvolutionLayer>; +} // namespace + +// F32 +BENCHMARK_DEFINE_F(ConvolutionLayerAlexNetF32, neon_alexnet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + conv_layer->run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 1, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 2, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 3, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 4, 1, 4, 8>); + +// QS8 +BENCHMARK_DEFINE_F(ConvolutionLayerAlexNetQS8, neon_alexnet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + conv_layer->run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 1, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 2, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 3, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 4, 1, 4, 8>); + +BENCHMARK_DEFINE_F(ConvolutionLayerLeNet5, neon_lenet5) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + conv_layer->run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(ConvolutionLayerLeNet5, neon_lenet5) +->Threads(1) +->Apply(DataSetArgBatched<LeNet5ConvolutionLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerLeNet5, neon_lenet5) +->Threads(1) +->Apply(DataSetArgBatched<LeNet5ConvolutionLayerDataset, 1, 1, 4, 8>); + +BENCHMARK_DEFINE_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + conv_layer->run(); + profiler.stop(); + } +} + +BENCHMARK_DEFINE_F(ConvolutionLayerGoogLeNet2, neon_googlenet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + conv_layer->run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 1, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 2, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 3, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 4, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 5, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 6, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 7, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 8, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 9, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 10, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 11, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 12, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 13, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 14, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 15, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 16, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 17, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 18, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 19, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 20, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 21, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 22, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 23, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 24, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 25, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 26, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 27, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 28, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 29, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 30, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 31, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 1, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 2, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 3, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 4, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 5, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 6, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 7, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 8, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 9, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 10, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 11, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 12, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 13, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 14, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 15, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 16, 1, 4, 8>); diff --git a/tests/benchmark/NEON/ConvolutionLayerDirect.cpp b/tests/benchmark/NEON/ConvolutionLayerDirect.cpp new file mode 100644 index 0000000000..bc56e844d8 --- /dev/null +++ b/tests/benchmark/NEON/ConvolutionLayerDirect.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "Globals.h" +#include "NEON/Helper.h" +#include "NEON/NEAccessor.h" +#include "TensorLibrary.h" +#include "benchmark/Datasets.h" +#include "benchmark/Profiler.h" +#include "benchmark/WallClockTimer.h" +#include "dataset/ConvolutionLayerDataset.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include "benchmark/benchmark_api.h" + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; +using namespace arm_compute::test::neon; + +#include "benchmark/common/ConvolutionLayer.h" + +namespace +{ +using ConvolutionLayerDirectAlexNet = ConvolutionLayer<AlexNetConvolutionLayerDataset, Tensor, NEAccessor, NEDirectConvolutionLayer>; +} // namespace + +BENCHMARK_DEFINE_F(ConvolutionLayerDirectAlexNet, neon_alexnet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + conv_layer->run(); + profiler.stop(); + } +} + +// Registr only the 3x3 convolution layers +BENCHMARK_REGISTER_F(ConvolutionLayerDirectAlexNet, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 2, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerDirectAlexNet, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 3, 1, 4, 8>); +BENCHMARK_REGISTER_F(ConvolutionLayerDirectAlexNet, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 4, 1, 4, 8>); diff --git a/tests/benchmark/NEON/FullyConnectedLayer.cpp b/tests/benchmark/NEON/FullyConnectedLayer.cpp new file mode 100644 index 0000000000..85979203ac --- /dev/null +++ b/tests/benchmark/NEON/FullyConnectedLayer.cpp @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "Globals.h" +#include "NEON/Helper.h" +#include "NEON/NEAccessor.h" +#include "TensorLibrary.h" +#include "benchmark/Datasets.h" +#include "benchmark/Profiler.h" +#include "benchmark/WallClockTimer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include "benchmark/benchmark_api.h" + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; +using namespace arm_compute::test::neon; + +#include "benchmark/common/FullyConnectedLayer.h" + +namespace +{ +using FullyConnectedLayerAlexNetF32 = FullyConnectedLayer<AlexNetFullyConnectedLayerDataset, Tensor, NEAccessor, NEFullyConnectedLayer>; +using FullyConnectedLayerAlexNetQS8 = FullyConnectedLayer<AlexNetFullyConnectedLayerDataset, Tensor, NEAccessor, NEFullyConnectedLayer, DataType::QS8>; +using FullyConnectedLayerLeNet5 = FullyConnectedLayer<LeNet5FullyConnectedLayerDataset, Tensor, NEAccessor, NEFullyConnectedLayer>; +using FullyConnectedLayerGoogLeNet = FullyConnectedLayer<GoogLeNetFullyConnectedLayerDataset, Tensor, NEAccessor, NEFullyConnectedLayer>; +} // namespace + +// F32 +BENCHMARK_DEFINE_F(FullyConnectedLayerAlexNetF32, neon_alexnet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + fc_layer->run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetF32, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetF32, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 1, 1, 4, 8>); +BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetF32, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 2, 1, 4, 8>); + +// QS8 +BENCHMARK_DEFINE_F(FullyConnectedLayerAlexNetQS8, neon_alexnet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + fc_layer->run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetQS8, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetQS8, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 1, 1, 4, 8>); +BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetQS8, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 2, 1, 4, 8>); + +BENCHMARK_DEFINE_F(FullyConnectedLayerLeNet5, neon_lenet5) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + fc_layer->run(); + profiler.stop(); + } +} +BENCHMARK_REGISTER_F(FullyConnectedLayerLeNet5, neon_lenet5) +->Threads(1) +->Apply(DataSetArgBatched<LeNet5FullyConnectedLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(FullyConnectedLayerLeNet5, neon_lenet5) +->Threads(1) +->Apply(DataSetArgBatched<LeNet5FullyConnectedLayerDataset, 1, 1, 4, 8>); + +BENCHMARK_DEFINE_F(FullyConnectedLayerGoogLeNet, neon_googlenet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + fc_layer->run(); + profiler.stop(); + } +} +BENCHMARK_REGISTER_F(FullyConnectedLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetFullyConnectedLayerDataset, 0, 1, 4, 8>); diff --git a/tests/benchmark/NEON/GEMM.cpp b/tests/benchmark/NEON/GEMM.cpp new file mode 100644 index 0000000000..9190309f1c --- /dev/null +++ b/tests/benchmark/NEON/GEMM.cpp @@ -0,0 +1,709 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "Globals.h" +#include "NEON/Helper.h" +#include "NEON/NEAccessor.h" +#include "TensorLibrary.h" +#include "benchmark/Datasets.h" +#include "benchmark/Profiler.h" +#include "benchmark/WallClockTimer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEGEMM.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include "benchmark/benchmark_api.h" + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; +using namespace arm_compute::test::neon; + +#include "benchmark/NEON/GEMM.h" + +namespace +{ +#ifdef ENABLE_FP16 +using GEMMFP16GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, Tensor, NEAccessor, NEGEMM, DataType::F16>; +using GEMMFP16GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, Tensor, NEAccessor, NEGEMM, DataType::F16>; +#endif /* ENABLE_FP16 */ +using GEMMFP32GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, Tensor, NEAccessor, NEGEMM, DataType::F32>; +using GEMMFP32GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, Tensor, NEAccessor, NEGEMM, DataType::F32>; +using GEMMQS8GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, Tensor, NEAccessor, NEGEMM, DataType::QS8>; +using GEMMQS8GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, Tensor, NEAccessor, NEGEMM, DataType::QS8>; +} // namespace +#ifdef ENABLE_FP16 +BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet1, neon_googlenet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + gemm_layer->run(); + profiler.stop(); + } +} + +BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet2, neon_googlenet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + gemm_layer->run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>); +BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>); +#endif /* ENABLE_FP16 */ + +BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet1, neon_googlenet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + gemm_layer->run(); + profiler.stop(); + } +} + +BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet2, neon_googlenet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + gemm_layer->run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>); +BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>); + +BENCHMARK_DEFINE_F(GEMMQS8GoogLeNet1, neon_googlenet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + gemm_layer->run(); + profiler.stop(); + } +} + +BENCHMARK_DEFINE_F(GEMMQS8GoogLeNet2, neon_googlenet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + gemm_layer->run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>); +BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) +->Threads(1) +->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>); diff --git a/tests/benchmark/NEON/GEMM.h b/tests/benchmark/NEON/GEMM.h new file mode 100644 index 0000000000..24d196523f --- /dev/null +++ b/tests/benchmark/NEON/GEMM.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TEST_BENCHMARK_NEON_GEMM_H__ +#define __ARM_COMPUTE_TEST_BENCHMARK_NEON_GEMM_H__ + +#include "TensorLibrary.h" +#include "Utils.h" +#include "dataset/GEMMDataset.h" + +#include <memory> + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; + +namespace arm_compute +{ +namespace test +{ +namespace benchmark +{ +// FIXME: Merge with CL/GEMM.h into common/GEMM.h after adding F16 support to NEON GEMM and QS8 support to CL GEMM +template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType data_type> +class GEMM : public ::benchmark::Fixture +{ +public: + void SetUp(::benchmark::State &state) override + { +#ifdef ENABLE_FP16 + ARM_COMPUTE_ERROR_ON_MSG(data_type != DataType::F16 && data_type != DataType::F32 && data_type != DataType::QS8, "Unsupported data type for GEMM operation"); +#else /* ENABLE_FP16 */ + ARM_COMPUTE_ERROR_ON_MSG(data_type != DataType::F32 && data_type != DataType::QS8, "Unsupported data type for GEMM operation"); +#endif /* ENABLE_FP16 */ + + profiler.add(std::make_shared<WallClockTimer>()); + + const GEMMDataObject gemm_obj = *(DataSet().begin() + state.range(0)); + + TensorShape shape_a = gemm_obj.shape_a; + TensorShape shape_b = gemm_obj.shape_b; + TensorShape shape_c = gemm_obj.shape_c; + TensorShape shape_d = gemm_obj.shape_d; + + // Create tensors + a = create_tensor(shape_a, data_type, 1, 4); + b = create_tensor(shape_b, data_type, 1, 4); + c = create_tensor(shape_c, data_type, 1, 4); + d = create_tensor(shape_d, data_type, 1, 4); + + // Create and configure function + gemm_layer = std::unique_ptr<Function>(new Function()); + gemm_layer->configure(&a, &b, &c, &d, gemm_obj.alpha, gemm_obj.beta); + + // Allocate tensors + a.allocator()->allocate(); + b.allocator()->allocate(); + c.allocator()->allocate(); + d.allocator()->allocate(); + } + + void TearDown(::benchmark::State &state) override + { + gemm_layer.reset(); + + a.allocator()->free(); + b.allocator()->free(); + c.allocator()->free(); + d.allocator()->free(); + + profiler.submit(state); + } + + std::unique_ptr<Function> gemm_layer{ nullptr }; + Profiler profiler{}; + +private: + TensorType a{}; + TensorType b{}; + TensorType c{}; + TensorType d{}; +}; +} // namespace benchmark +} // namespace test +} // namespace arm_compute +#endif //__ARM_COMPUTE_TEST_BENCHMARK_NEON_GEMM_H__ diff --git a/tests/benchmark/NEON/NormalizationLayer.cpp b/tests/benchmark/NEON/NormalizationLayer.cpp new file mode 100644 index 0000000000..46dc56b84d --- /dev/null +++ b/tests/benchmark/NEON/NormalizationLayer.cpp @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "Globals.h" +#include "NEON/Helper.h" +#include "NEON/NEAccessor.h" +#include "TensorLibrary.h" +#include "benchmark/Datasets.h" +#include "benchmark/Profiler.h" +#include "benchmark/WallClockTimer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NENormalizationLayer.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include "benchmark/benchmark_api.h" + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; +using namespace arm_compute::test::neon; + +#include "benchmark/common/NormalizationLayer.h" + +namespace +{ +using NormalizationLayerAlexNetF32 = NormalizationLayer<AlexNetNormalizationLayerDataset, Tensor, NEAccessor, NENormalizationLayer>; +using NormalizationLayerAlexNetQS8 = NormalizationLayer<AlexNetNormalizationLayerDataset, Tensor, NEAccessor, NENormalizationLayer, DataType::QS8>; +using NormalizationLayerGoogLeNet = NormalizationLayer<GoogLeNetNormalizationLayerDataset, Tensor, NEAccessor, NENormalizationLayer>; +} // namespace + +// F32 +BENCHMARK_DEFINE_F(NormalizationLayerAlexNetF32, neon_alexnet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + norm_layer->run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(NormalizationLayerAlexNetF32, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(NormalizationLayerAlexNetF32, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 1, 1, 4, 8>); + +// QS8 +BENCHMARK_DEFINE_F(NormalizationLayerAlexNetQS8, neon_alexnet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + norm_layer->run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(NormalizationLayerAlexNetQS8, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(NormalizationLayerAlexNetQS8, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 1, 1, 4, 8>); + +BENCHMARK_DEFINE_F(NormalizationLayerGoogLeNet, neon_googlenet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + norm_layer->run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(NormalizationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetNormalizationLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(NormalizationLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetNormalizationLayerDataset, 1, 1, 4, 8>); diff --git a/tests/benchmark/NEON/PoolingLayer.cpp b/tests/benchmark/NEON/PoolingLayer.cpp new file mode 100644 index 0000000000..9b071317b4 --- /dev/null +++ b/tests/benchmark/NEON/PoolingLayer.cpp @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "Globals.h" +#include "NEON/Helper.h" +#include "NEON/NEAccessor.h" +#include "TensorLibrary.h" +#include "benchmark/Datasets.h" +#include "benchmark/Profiler.h" +#include "benchmark/WallClockTimer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include "benchmark/benchmark_api.h" + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; +using namespace arm_compute::test::neon; + +#include "benchmark/common/PoolingLayer.h" + +namespace +{ +using PoolingLayerAlexNetF32 = PoolingLayer<AlexNetPoolingLayerDataset, Tensor, NEAccessor, NEPoolingLayer>; +using PoolingLayerAlexNetQS8 = PoolingLayer<AlexNetPoolingLayerDataset, Tensor, NEAccessor, NEPoolingLayer, DataType::QS8>; +using PoolingLayerLeNet5 = PoolingLayer<LeNet5PoolingLayerDataset, Tensor, NEAccessor, NEPoolingLayer>; +using PoolingLayerGoogLeNet = PoolingLayer<GoogLeNetPoolingLayerDataset, Tensor, NEAccessor, NEPoolingLayer>; +} // namespace + +// F32 +BENCHMARK_DEFINE_F(PoolingLayerAlexNetF32, neon_alexnet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + pool_layer.run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(PoolingLayerAlexNetF32, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerAlexNetF32, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 1, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerAlexNetF32, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 2, 1, 4, 8>); + +// QS8 +BENCHMARK_DEFINE_F(PoolingLayerAlexNetQS8, neon_alexnet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + pool_layer.run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(PoolingLayerAlexNetQS8, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerAlexNetQS8, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 1, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerAlexNetQS8, neon_alexnet) +->Threads(1) +->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 2, 1, 4, 8>); + +BENCHMARK_DEFINE_F(PoolingLayerLeNet5, neon_lenet5) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + pool_layer.run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(PoolingLayerLeNet5, neon_lenet5) +->Threads(1) +->Apply(DataSetArgBatched<LeNet5PoolingLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerLeNet5, neon_lenet5) +->Threads(1) +->Apply(DataSetArgBatched<LeNet5PoolingLayerDataset, 1, 1, 4, 8>); + +BENCHMARK_DEFINE_F(PoolingLayerGoogLeNet, neon_googlenet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + pool_layer.run(); + profiler.stop(); + } +} + +// FIXME: Add support for 7x7 pooling layer pool5/7x7_s1 +BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 0, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 1, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 2, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 3, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 4, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 5, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 6, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 7, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 8, 1, 4, 8>); +BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet) +->Threads(1) +->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 9, 1, 4, 8>); diff --git a/tests/benchmark/PMUCounter.cpp b/tests/benchmark/PMUCounter.cpp new file mode 100644 index 0000000000..e87dae82e6 --- /dev/null +++ b/tests/benchmark/PMUCounter.cpp @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "PMUCounter.h" + +#include "Utils.h" + +#define _GNU_SOURCE 1 +#include <asm/unistd.h> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <fcntl.h> +#include <linux/hw_breakpoint.h> +#include <linux/perf_event.h> +#include <stdexcept> +#include <sys/ioctl.h> +#include <unistd.h> + +namespace arm_compute +{ +namespace test +{ +namespace benchmark +{ +CycleCounter::CycleCounter() +{ + const pid_t pid = getpid(); + + struct perf_event_attr perf_config + { + }; + memset(&perf_config, 0, sizeof(struct perf_event_attr)); + + perf_config.config = PERF_COUNT_HW_CPU_CYCLES; + perf_config.size = sizeof(struct perf_event_attr); + perf_config.type = PERF_TYPE_HARDWARE; + // The inherit bit specifies that this counter should count events of child + // tasks as well as the task specified + perf_config.inherit = 1; + // Enables saving of event counts on context switch for inherited tasks + perf_config.inherit_stat = 1; + + _fd = syscall(__NR_perf_event_open, &perf_config, pid, -1, -1, 0); + + if(_fd < 0) + { + throw std::runtime_error("perf_event_open for cycles failed"); + } +} + +std::string CycleCounter::id() const +{ + return "Cycle Counter"; +} + +void CycleCounter::start() +{ + ioctl(_fd, PERF_EVENT_IOC_RESET, 0); + ioctl(_fd, PERF_EVENT_IOC_ENABLE, 0); +} + +void CycleCounter::stop() +{ + ioctl(_fd, PERF_EVENT_IOC_DISABLE, 0); + read(_fd, &_cycles, sizeof(_cycles)); +} + +std::unique_ptr<Instrument::IMeasurement> CycleCounter::get_measurement() const +{ + return ::arm_compute::test::cpp14::make_unique<Instrument::Measurement<long long>>(_cycles); +} + +InstructionCounter::InstructionCounter() +{ + const pid_t pid = getpid(); + + struct perf_event_attr perf_config + { + }; + memset(&perf_config, 0, sizeof(struct perf_event_attr)); + + perf_config.config = PERF_COUNT_HW_INSTRUCTIONS; + perf_config.size = sizeof(struct perf_event_attr); + perf_config.type = PERF_TYPE_HARDWARE; + // The inherit bit specifies that this counter should count events of child + // tasks as well as the task specified + perf_config.inherit = 1; + // Enables saving of event counts on context switch for inherited tasks + perf_config.inherit_stat = 1; + + _fd = syscall(__NR_perf_event_open, &perf_config, pid, -1, -1, 0); + + if(_fd < 0) + { + throw std::runtime_error("perf_event_open for instructions failed"); + } +} + +std::string InstructionCounter::id() const +{ + return "Instruction Counter"; +} + +void InstructionCounter::start() +{ + ioctl(_fd, PERF_EVENT_IOC_RESET, 0); + ioctl(_fd, PERF_EVENT_IOC_ENABLE, 0); +} + +void InstructionCounter::stop() +{ + ioctl(_fd, PERF_EVENT_IOC_DISABLE, 0); + read(_fd, &_instructions, sizeof(_instructions)); +} + +std::unique_ptr<Instrument::IMeasurement> InstructionCounter::get_measurement() const +{ + return std::unique_ptr<Instrument::IMeasurement>(new Instrument::Measurement<long long>(_instructions)); +} +} // namespace benchmark +} // namespace test +} // namespace arm_compute diff --git a/tests/benchmark/PMUCounter.h b/tests/benchmark/PMUCounter.h new file mode 100644 index 0000000000..de45f319f6 --- /dev/null +++ b/tests/benchmark/PMUCounter.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TEST_BENCHMARK_PMU_COUNTER_H__ +#define __ARM_COMPUTE_TEST_BENCHMARK_PMU_COUNTER_H__ + +#include "Instrument.h" + +namespace arm_compute +{ +namespace test +{ +namespace benchmark +{ +/** Implementation of an instrument to count CPU cycles. */ +class CycleCounter : public Instrument +{ +public: + /** Initialise the cycle counter. */ + CycleCounter(); + + std::string id() const override; + void start() override; + void stop() override; + std::unique_ptr<Instrument::IMeasurement> get_measurement() const override; + +private: + long _fd{ -1 }; + long long _cycles{ 0 }; +}; + +/** Implementation of an instrument to count executed CPU instructions. */ +class InstructionCounter : public Instrument +{ +public: + /** Initialise the instruction counter. */ + InstructionCounter(); + + std::string id() const override; + void start() override; + void stop() override; + std::unique_ptr<Instrument::IMeasurement> get_measurement() const override; + +private: + long _fd{ -1 }; + long long _instructions{ 0 }; +}; +} // namespace benchmark +} // namespace test +} // namespace arm_compute +#endif diff --git a/tests/benchmark/PerformanceProgramOptions.cpp b/tests/benchmark/PerformanceProgramOptions.cpp new file mode 100644 index 0000000000..b4becc3c69 --- /dev/null +++ b/tests/benchmark/PerformanceProgramOptions.cpp @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "PerformanceProgramOptions.h" + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Weffc++" +#pragma GCC diagnostic ignored "-Wnon-virtual-dtor" +#pragma GCC diagnostic ignored "-Wctor-dtor-privacy" +#include "boost/program_options.hpp" +#pragma GCC diagnostic pop + +namespace arm_compute +{ +namespace test +{ +namespace performance +{ +PerformanceProgramOptions::PerformanceProgramOptions() +{ + boost::program_options::options_description options("Performance options"); + options.add_options()("runs", boost::program_options::value<unsigned int>()->default_value(1), "Repetitions per test"); + options.add_options()("threads", boost::program_options::value<unsigned int>()->default_value(1), "Number of parallel CPU threads"); + add_options(options); +} +} // namespace performance +} // namespace test +} // namespace arm_compute diff --git a/tests/benchmark/PerformanceProgramOptions.h b/tests/benchmark/PerformanceProgramOptions.h new file mode 100644 index 0000000000..671e263bb2 --- /dev/null +++ b/tests/benchmark/PerformanceProgramOptions.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TEST_PERFORMANCE_PROGRAM_OPTIONS_H__ +#define __ARM_COMPUTE_TEST_PERFORMANCE_PROGRAM_OPTIONS_H__ + +#include "ProgramOptions.h" + +namespace arm_compute +{ +namespace test +{ +namespace performance +{ +/** Subclass of @ref ProgramOptions that adds performance specific options. */ +class PerformanceProgramOptions : public ProgramOptions +{ +public: + /** Defines additonal options. */ + PerformanceProgramOptions(); +}; +} // namespace performance +} // namespace test +} // namespace arm_compute +#endif diff --git a/tests/benchmark/PerformanceUserConfiguration.cpp b/tests/benchmark/PerformanceUserConfiguration.cpp new file mode 100644 index 0000000000..ca412d660a --- /dev/null +++ b/tests/benchmark/PerformanceUserConfiguration.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "PerformanceUserConfiguration.h" + +#include "ProgramOptions.h" + +namespace arm_compute +{ +namespace test +{ +namespace performance +{ +PerformanceUserConfiguration::PerformanceUserConfiguration(const ProgramOptions &options) + : UserConfiguration(options) +{ + unsigned int tmp_runs = 0; + if(options.get("runs", tmp_runs)) + { + runs = tmp_runs; + } +} +} // namespace performance +} // namespace test +} // namespace arm_compute diff --git a/tests/benchmark/PerformanceUserConfiguration.h b/tests/benchmark/PerformanceUserConfiguration.h new file mode 100644 index 0000000000..a140d404c8 --- /dev/null +++ b/tests/benchmark/PerformanceUserConfiguration.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TEST_PERFORMANCE_PERFORMANCE_USER_CONFIGURATION_H__ +#define __ARM_COMPUTE_TEST_PERFORMANCE_PERFORMANCE_USER_CONFIGURATION_H__ + +#include "UserConfiguration.h" + +namespace arm_compute +{ +namespace test +{ +class ProgramOptions; + +namespace performance +{ +/** Specialisation of @ref UserConfiguration to provide performance specific + * configuration options. + */ +struct PerformanceUserConfiguration : public UserConfiguration +{ + PerformanceUserConfiguration() = default; + + /** Initialise the configuration according to the program options. + * + * @param[in] options Parsed command line options. + */ + PerformanceUserConfiguration(const ProgramOptions &options); + + Option<unsigned int> runs{}; +}; +} // namespace performance + +extern performance::PerformanceUserConfiguration user_config; +} // namespace test +} // namespace arm_compute +#endif diff --git a/tests/benchmark/Profiler.cpp b/tests/benchmark/Profiler.cpp new file mode 100644 index 0000000000..f3ce94164f --- /dev/null +++ b/tests/benchmark/Profiler.cpp @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "Profiler.h" + +#include <iostream> +#include <utility> + +namespace arm_compute +{ +namespace test +{ +namespace benchmark +{ +void Profiler::add(const std::shared_ptr<Instrument> &instrument) +{ + _instruments.push_back(instrument); +} + +void Profiler::start() +{ + for(auto &instrument : _instruments) + { + instrument->start(); + } +} + +void Profiler::stop() +{ + for(auto &instrument : _instruments) + { + instrument->stop(); + } + + for(const auto &instrument : _instruments) + { + _measurements[instrument->id()].push_back(*instrument->get_measurement()); + } +} + +void Profiler::submit(::benchmark::State &state) +{ + for(auto &instrument : _measurements) + { + double sum_values = std::accumulate(instrument.second.begin(), instrument.second.end(), 0.); + size_t num_values = instrument.second.size(); + + if(num_values > 2) + { + auto minmax_values = std::minmax_element(instrument.second.begin(), instrument.second.end()); + state.counters[instrument.first + "_min"] = *minmax_values.first; + state.counters[instrument.first + "_max"] = *minmax_values.second; + sum_values -= *minmax_values.first + *minmax_values.second; + num_values -= 2; + } + state.counters[instrument.first] = sum_values / num_values; + instrument.second.clear(); + } +} + +const Profiler::MeasurementsMap &Profiler::measurements() const +{ + return _measurements; +} +} // namespace benchmark +} // namespace test +} // namespace arm_compute diff --git a/tests/benchmark/Profiler.h b/tests/benchmark/Profiler.h new file mode 100644 index 0000000000..03922f4704 --- /dev/null +++ b/tests/benchmark/Profiler.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TEST_BENCHMARK_PROFILER_H__ +#define __ARM_COMPUTE_TEST_BENCHMARK_PROFILER_H__ + +#include "Instrument.h" + +#include "benchmark/benchmark_api.h" + +#include <map> +#include <memory> +#include <string> +#include <vector> + +namespace arm_compute +{ +namespace test +{ +namespace benchmark +{ +class Profiler +{ +public: + /** Mapping from instrument ids to their measurements. */ + using MeasurementsMap = std::map<std::string, std::vector<double>>; + + /** Add @p instrument to the performance montior. + * + * All added instruments will be used when @ref start or @ref stop are + * called to make measurements. + * + * @param[in] instrument Instrument to be used to measure performance. + */ + void add(const std::shared_ptr<Instrument> &instrument); + + /** Start all added instruments to measure performance. */ + void start(); + + /** Stop all added instruments. */ + void stop(); + + /** Commit all measured values to the current active test. */ + void submit(::benchmark::State &state); + + /** Return measurements for all instruments. */ + const MeasurementsMap &measurements() const; + +private: + std::vector<std::shared_ptr<Instrument>> _instruments{}; + MeasurementsMap _measurements{}; +}; +} // namespace benchmark +} // namespace test +} // namespace arm_compute +#endif diff --git a/tests/benchmark/WallClockTimer.cpp b/tests/benchmark/WallClockTimer.cpp new file mode 100644 index 0000000000..9ab53d0b3c --- /dev/null +++ b/tests/benchmark/WallClockTimer.cpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "WallClockTimer.h" + +#include "Utils.h" + +namespace arm_compute +{ +namespace test +{ +namespace benchmark +{ +std::string WallClockTimer::id() const +{ + return "Wall clock"; +} + +void WallClockTimer::start() +{ + _start = std::chrono::high_resolution_clock::now(); +} + +void WallClockTimer::stop() +{ + _stop = std::chrono::high_resolution_clock::now(); +} + +std::unique_ptr<Instrument::IMeasurement> WallClockTimer::get_measurement() const +{ + const std::chrono::duration<float, std::milli> delta = _stop - _start; + return ::arm_compute::test::cpp14::make_unique<Instrument::Measurement<float>>(delta.count()); +} +} // namespace benchmark +} // namespace test +} // namespace arm_compute diff --git a/tests/benchmark/WallClockTimer.h b/tests/benchmark/WallClockTimer.h new file mode 100644 index 0000000000..cf6828e88d --- /dev/null +++ b/tests/benchmark/WallClockTimer.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TEST_BENCHMARK_WALL_CLOCK_TIMER_H__ +#define __ARM_COMPUTE_TEST_BENCHMARK_WALL_CLOCK_TIMER_H__ + +#include "Instrument.h" + +#include <chrono> + +namespace arm_compute +{ +namespace test +{ +namespace benchmark +{ +/** Implementation of an instrument to measure elapsed wall-clock time in milliseconds. */ +class WallClockTimer : public Instrument +{ +public: + std::string id() const override; + void start() override; + void stop() override; + std::unique_ptr<Instrument::IMeasurement> get_measurement() const override; + +private: + std::chrono::high_resolution_clock::time_point _start{}; + std::chrono::high_resolution_clock::time_point _stop{}; +}; +} // namespace benchmark +} // namespace test +} // namespace arm_compute +#endif diff --git a/tests/benchmark/common/ActivationLayer.h b/tests/benchmark/common/ActivationLayer.h new file mode 100644 index 0000000000..7edfb6ef3c --- /dev/null +++ b/tests/benchmark/common/ActivationLayer.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TEST_BENCHMARK_ACTIVATION_LAYER_H__ +#define __ARM_COMPUTE_TEST_BENCHMARK_ACTIVATION_LAYER_H__ + +#include "TensorLibrary.h" +#include "Utils.h" +#include "dataset/ActivationLayerDataset.h" + +#include <memory> + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; + +namespace arm_compute +{ +namespace test +{ +namespace benchmark +{ +template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType dt = DataType::F32> +class ActivationLayer : public ::benchmark::Fixture +{ +public: + void SetUp(::benchmark::State &state) override + { + profiler.add(std::make_shared<WallClockTimer>()); + + const ActivationLayerDataObject act_obj = *(DataSet().begin() + state.range(0)); + + // Set batched in source and destination shapes + const unsigned int batches = state.range(1); + const unsigned int fixed_point_position = 4; + TensorShape shape = act_obj.shape; + shape.set(shape.num_dimensions(), batches); + + // Create tensors + src = create_tensor(shape, dt, 1, fixed_point_position); + dst = create_tensor(shape, dt, 1, fixed_point_position); + + // Create and configure function + act_layer.configure(&src, &dst, act_obj.info); + + // Allocate tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + + // Fill tensors + library->fill_tensor_uniform(Accessor(src), 0); + } + + void TearDown(::benchmark::State &state) override + { + src.allocator()->free(); + dst.allocator()->free(); + + profiler.submit(state); + } + + Function act_layer{}; + Profiler profiler{}; + +private: + TensorType src{}; + TensorType dst{}; +}; +} // namespace benchmark +} // namespace test +} // namespace arm_compute +#endif //__ARM_COMPUTE_TEST_BENCHMARK_ACTIVATION_LAYER_H__ diff --git a/tests/benchmark/common/ConvolutionLayer.h b/tests/benchmark/common/ConvolutionLayer.h new file mode 100644 index 0000000000..594c62c50e --- /dev/null +++ b/tests/benchmark/common/ConvolutionLayer.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TEST_BENCHMARK_CONVOLUTION_LAYER_H__ +#define __ARM_COMPUTE_TEST_BENCHMARK_CONVOLUTION_LAYER_H__ + +#include "TensorLibrary.h" +#include "Utils.h" +#include "dataset/ConvolutionLayerDataset.h" + +#include <memory> + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; + +namespace arm_compute +{ +namespace test +{ +namespace benchmark +{ +template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType dt = DataType::F32> +class ConvolutionLayer : public ::benchmark::Fixture +{ +public: + void SetUp(::benchmark::State &state) override + { + profiler.add(std::make_shared<WallClockTimer>()); + + const ConvolutionLayerDataObject conv_obj = *(DataSet().begin() + state.range(0)); + + // Set batched in source and destination shapes + const unsigned int batches = state.range(1); + const unsigned int fixed_point_position = 4; + TensorShape src_shape = conv_obj.src_shape; + TensorShape dst_shape = conv_obj.dst_shape; + src_shape.set(3 /* batch */, batches); + dst_shape.set(3 /* batch */, batches); + + // Create tensors + src = create_tensor(src_shape, dt, 1, fixed_point_position); + weights = create_tensor(conv_obj.weights_shape, dt, 1, fixed_point_position); + bias = create_tensor(conv_obj.bias_shape, dt, 1, fixed_point_position); + dst = create_tensor(dst_shape, dt, 1, fixed_point_position); + + // Create and configure function + conv_layer = std::unique_ptr<Function>(new Function()); + conv_layer->configure(&src, &weights, &bias, &dst, conv_obj.info); + + // Allocate tensors + src.allocator()->allocate(); + weights.allocator()->allocate(); + bias.allocator()->allocate(); + dst.allocator()->allocate(); + + // Fill tensors + library->fill_tensor_uniform(Accessor(src), 0); + library->fill_tensor_uniform(Accessor(weights), 1); + library->fill_tensor_uniform(Accessor(bias), 2); + } + + void TearDown(::benchmark::State &state) override + { + conv_layer.reset(); + + src.allocator()->free(); + weights.allocator()->free(); + bias.allocator()->free(); + dst.allocator()->free(); + + profiler.submit(state); + } + + std::unique_ptr<Function> conv_layer{ nullptr }; + Profiler profiler{}; + +private: + TensorType src{}; + TensorType weights{}; + TensorType bias{}; + TensorType dst{}; +}; +} // namespace benchmark +} // namespace test +} // namespace arm_compute +#endif //__ARM_COMPUTE_TEST_BENCHMARK_CONVOLUTION_LAYER_H__ diff --git a/tests/benchmark/common/FullyConnectedLayer.h b/tests/benchmark/common/FullyConnectedLayer.h new file mode 100644 index 0000000000..88adf83f2a --- /dev/null +++ b/tests/benchmark/common/FullyConnectedLayer.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TEST_BENCHMARK_FULLYCONNECTED_LAYER_H__ +#define __ARM_COMPUTE_TEST_BENCHMARK_FULLYCONNECTED_LAYER_H__ + +#include "TensorLibrary.h" +#include "Utils.h" +#include "dataset/ConvolutionLayerDataset.h" + +#include <memory> +#include <string> + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; + +namespace arm_compute +{ +namespace test +{ +namespace benchmark +{ +template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType dt = DataType::F32> +class FullyConnectedLayer : public ::benchmark::Fixture +{ +public: + void SetUp(::benchmark::State &state) override + { + profiler.add(std::make_shared<WallClockTimer>()); + + const FullyConnectedLayerDataObject fc_obj = *(DataSet().begin() + state.range(0)); + + // Set batched in source and destination shapes + const unsigned int batches = state.range(1); + const unsigned int fixed_point_position = 4; + TensorShape src_shape = fc_obj.src_shape; + TensorShape dst_shape = fc_obj.dst_shape; + src_shape.set(src_shape.num_dimensions(), batches); + dst_shape.set(dst_shape.num_dimensions(), batches); + + // Create tensors + src = create_tensor(src_shape, dt, 1, fixed_point_position); + weights = create_tensor(fc_obj.weights_shape, dt, 1, fixed_point_position); + bias = create_tensor(fc_obj.bias_shape, dt, 1, fixed_point_position); + dst = create_tensor(dst_shape, dt, 1, fixed_point_position); + + // Create and configure function + fc_layer = std::unique_ptr<Function>(new Function()); + fc_layer->configure(&src, &weights, &bias, &dst); + + // Allocate tensors + src.allocator()->allocate(); + weights.allocator()->allocate(); + bias.allocator()->allocate(); + dst.allocator()->allocate(); + + // Fill tensors + library->fill_tensor_uniform(Accessor(src), 0); + library->fill_tensor_uniform(Accessor(weights), 1); + library->fill_tensor_uniform(Accessor(bias), 2); + } + + void TearDown(::benchmark::State &state) override + { + fc_layer.reset(); + + src.allocator()->free(); + weights.allocator()->free(); + bias.allocator()->free(); + dst.allocator()->free(); + + profiler.submit(state); + } + + std::unique_ptr<Function> fc_layer{ nullptr }; + Profiler profiler{}; + +private: + TensorType src{}; + TensorType weights{}; + TensorType bias{}; + TensorType dst{}; +}; +} // namespace benchmark +} // namespace test +} // namespace arm_compute +#endif //__ARM_COMPUTE_TEST_BENCHMARK_FULLYCONNECTED_LAYER_H__ diff --git a/tests/benchmark/common/NormalizationLayer.h b/tests/benchmark/common/NormalizationLayer.h new file mode 100644 index 0000000000..4593fb7df3 --- /dev/null +++ b/tests/benchmark/common/NormalizationLayer.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TEST_BENCHMARK_NORMALIZATION_LAYER_H__ +#define __ARM_COMPUTE_TEST_BENCHMARK_NORMALIZATION_LAYER_H__ + +#include "TensorLibrary.h" +#include "Utils.h" +#include "dataset/NormalizationLayerDataset.h" + +#include <memory> +#include <string> + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; + +namespace arm_compute +{ +namespace test +{ +namespace benchmark +{ +template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType dt = DataType::F32> +class NormalizationLayer : public ::benchmark::Fixture +{ +public: + void SetUp(::benchmark::State &state) override + { + profiler.add(std::make_shared<WallClockTimer>()); + + const NormalizationLayerDataObject norm_obj = *(DataSet().begin() + state.range(0)); + + // Set batched in source and destination shapes + const unsigned int batches = state.range(1); + const unsigned int fixed_point_position = 4; + TensorShape shape = norm_obj.shape; + shape.set(shape.num_dimensions(), batches); + + // Create tensors + src = create_tensor(shape, dt, 1, fixed_point_position); + dst = create_tensor(shape, dt, 1, fixed_point_position); + + // Create and configure function + norm_layer = std::unique_ptr<Function>(new Function()); + norm_layer->configure(&src, &dst, norm_obj.info); + + // Allocate tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + + // Fill tensors + library->fill_tensor_uniform(Accessor(src), 0); + } + + void TearDown(::benchmark::State &state) override + { + norm_layer.reset(); + + src.allocator()->free(); + dst.allocator()->free(); + + profiler.submit(state); + } + + std::unique_ptr<Function> norm_layer{ nullptr }; + Profiler profiler{}; + +private: + TensorType src{}; + TensorType dst{}; +}; +} // namespace benchmark +} // namespace test +} // namespace arm_compute +#endif //__ARM_COMPUTE_TEST_BENCHMARK_NORMALIZATION_LAYER_H__ diff --git a/tests/benchmark/common/PoolingLayer.h b/tests/benchmark/common/PoolingLayer.h new file mode 100644 index 0000000000..5bb332fd6b --- /dev/null +++ b/tests/benchmark/common/PoolingLayer.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TEST_BENCHMARK_POOLING_LAYER_H__ +#define __ARM_COMPUTE_TEST_BENCHMARK_POOLING_LAYER_H__ + +#include "TensorLibrary.h" +#include "Utils.h" +#include "dataset/PoolingLayerDataset.h" + +#include <memory> + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; + +namespace arm_compute +{ +namespace test +{ +namespace benchmark +{ +template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType dt = DataType::F32> +class PoolingLayer : public ::benchmark::Fixture +{ +public: + void SetUp(::benchmark::State &state) override + { + profiler.add(std::make_shared<WallClockTimer>()); + + const PoolingLayerDataObject pool_obj = *(DataSet().begin() + state.range(0)); + + // Set batched in source and destination shapes + const unsigned int batches = state.range(1); + const unsigned int fixed_point_position = 4; + TensorShape src_shape = pool_obj.src_shape; + TensorShape dst_shape = pool_obj.dst_shape; + src_shape.set(src_shape.num_dimensions(), batches); + dst_shape.set(dst_shape.num_dimensions(), batches); + + // Create tensors + src = create_tensor(src_shape, dt, 1, fixed_point_position); + dst = create_tensor(dst_shape, dt, 1, fixed_point_position); + + // Create and configure function + pool_layer.configure(&src, &dst, pool_obj.info); + + // Allocate tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + + // Fill tensors + library->fill_tensor_uniform(Accessor(src), 0); + } + + void TearDown(::benchmark::State &state) override + { + // Free allocators + src.allocator()->free(); + dst.allocator()->free(); + + profiler.submit(state); + } + + Function pool_layer{}; + Profiler profiler{}; + +private: + TensorType src{}; + TensorType dst{}; +}; +} // namespace benchmark +} // namespace test +} // namespace arm_compute +#endif //__ARM_COMPUTE_TEST_BENCHMARK_POOLING_LAYER_H__ diff --git a/tests/benchmark/main.cpp b/tests/benchmark/main.cpp new file mode 100644 index 0000000000..acde259d9b --- /dev/null +++ b/tests/benchmark/main.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "Globals.h" +#include "PMUCounter.h" +#include "PerformanceProgramOptions.h" +#include "PerformanceUserConfiguration.h" +#include "TensorLibrary.h" +#include "Utils.h" +#include "WallClockTimer.h" + +#include "benchmark/benchmark_api.h" + +#ifdef OPENCL +#include "arm_compute/runtime/CL/CLScheduler.h" +#endif +#include "arm_compute/runtime/Scheduler.h" + +#include <iostream> +#include <memory> + +using namespace arm_compute::test; +using namespace arm_compute::test::performance; + +namespace arm_compute +{ +namespace test +{ +PerformanceUserConfiguration user_config; +std::unique_ptr<TensorLibrary> library; +} // namespace test +} // namespace arm_compute + +int main(int argc, char **argv) +{ + PerformanceProgramOptions options; + try + { + options.parse_commandline(argc, argv); + + if(options.wants_help()) + { + std::cout << "Usage: " << argv[0] << " [options] PATH\n"; + std::cout << options.get_help() << "\n"; + } + + user_config = PerformanceUserConfiguration(options); + } + catch(const boost::program_options::required_option &err) + { + std::cerr << "Error: " << err.what() << "\n"; + std::cout << "\nUsage: " << argv[0] << " [options] PATH\n"; + std::cout << options.get_help() << "\n"; + return 1; + } + + ::benchmark::Initialize(&argc, argv); + + if(user_config.seed.is_set()) + { + library = cpp14::make_unique<TensorLibrary>(user_config.path.get(), user_config.seed); + } + else + { + library = cpp14::make_unique<TensorLibrary>(user_config.path.get()); + } + +#ifdef OPENCL + arm_compute::CLScheduler::get().default_init(); +#endif + + std::cout << "Using " << user_config.threads << " CPU " << (user_config.threads == 1 ? "thread" : "threads") << "\n"; + arm_compute::Scheduler::get().set_num_threads(user_config.threads); + + ::benchmark::RunSpecifiedBenchmarks(); +} diff --git a/tests/benchmark/system_tests/CL/AlexNet.cpp b/tests/benchmark/system_tests/CL/AlexNet.cpp new file mode 100644 index 0000000000..fe0b9913de --- /dev/null +++ b/tests/benchmark/system_tests/CL/AlexNet.cpp @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "CL/CLAccessor.h" +#include "CL/Helper.h" +#include "Globals.h" +#include "TensorLibrary.h" +#include "benchmark/Datasets.h" +#include "benchmark/Profiler.h" +#include "benchmark/WallClockTimer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/CLSubTensor.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLActivationLayer.h" +#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" +#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" +#include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h" +#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h" +#include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h" + +#include "benchmark/benchmark_api.h" + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; +using namespace arm_compute::test::cl; + +#include "benchmark/system_tests/common/AlexNet.h" + +namespace +{ +using AlexNetSystemTest = AlexNetFixture<ICLTensor, + CLTensor, + CLSubTensor, + CLAccessor, + CLActivationLayer, + CLConvolutionLayer, + CLFullyConnectedLayer, + CLNormalizationLayer, + CLPoolingLayer, + CLSoftmaxLayer>; +} // namespace + +BENCHMARK_DEFINE_F(AlexNetSystemTest, cl_alexnet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run AlexNet + profiler.start(); + network.run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(AlexNetSystemTest, cl_alexnet) +->Threads(1) +->Iterations(10) +->ArgName("batch_size") +->Arg(1) +->Arg(4) +->Arg(8);
\ No newline at end of file diff --git a/tests/benchmark/system_tests/CL/LeNet5.cpp b/tests/benchmark/system_tests/CL/LeNet5.cpp new file mode 100644 index 0000000000..d65a7dde6c --- /dev/null +++ b/tests/benchmark/system_tests/CL/LeNet5.cpp @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "CL/CLAccessor.h" +#include "CL/Helper.h" +#include "Globals.h" +#include "TensorLibrary.h" +#include "benchmark/Datasets.h" +#include "benchmark/Profiler.h" +#include "benchmark/WallClockTimer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLActivationLayer.h" +#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" +#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" +#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h" +#include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h" + +#include "benchmark/benchmark_api.h" + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; +using namespace arm_compute::test::cl; + +#include "benchmark/system_tests/common/LeNet5.h" + +namespace +{ +using LeNet5SystemTest = LeNet5Fixture<CLTensor, + CLAccessor, + CLActivationLayer, + CLConvolutionLayer, + CLFullyConnectedLayer, + CLPoolingLayer, + CLSoftmaxLayer>; +} // namespace + +BENCHMARK_DEFINE_F(LeNet5SystemTest, cl_lenet5) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run LeNet5 + profiler.start(); + network.run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(LeNet5SystemTest, cl_lenet5) +->Threads(1) +->Iterations(10) +->ArgName("batch_size") +->Arg(1) +->Arg(16) +->Arg(32); diff --git a/tests/benchmark/system_tests/NEON/AlexNet.cpp b/tests/benchmark/system_tests/NEON/AlexNet.cpp new file mode 100644 index 0000000000..2d222e7309 --- /dev/null +++ b/tests/benchmark/system_tests/NEON/AlexNet.cpp @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "Globals.h" +#include "NEON/Helper.h" +#include "NEON/NEAccessor.h" +#include "TensorLibrary.h" +#include "benchmark/Datasets.h" +#include "benchmark/Profiler.h" +#include "benchmark/WallClockTimer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" +#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" +#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" +#include "arm_compute/runtime/NEON/functions/NENormalizationLayer.h" +#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h" +#include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h" +#include "arm_compute/runtime/SubTensor.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include "benchmark/benchmark_api.h" + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; +using namespace arm_compute::test::neon; + +#include "benchmark/system_tests/common/AlexNet.h" + +namespace +{ +using AlexNetSystemTestF32 = AlexNetFixture<ITensor, + Tensor, + SubTensor, + NEAccessor, + NEActivationLayer, + NEConvolutionLayer, + NEFullyConnectedLayer, + NENormalizationLayer, + NEPoolingLayer, + NESoftmaxLayer, + DataType::F32>; + +using AlexNetSystemTestQS8 = AlexNetFixture<ITensor, + Tensor, + SubTensor, + NEAccessor, + NEActivationLayer, + NEConvolutionLayer, + NEFullyConnectedLayer, + NENormalizationLayer, + NEPoolingLayer, + NESoftmaxLayer, + DataType::QS8>; +} // namespace + +// F32 +BENCHMARK_DEFINE_F(AlexNetSystemTestF32, neon_alexnet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run AlexNet + profiler.start(); + network.run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(AlexNetSystemTestF32, neon_alexnet) +->Threads(1) +->Iterations(10) +->ArgName("batch_size") +->Arg(1) +->Arg(4) +->Arg(8); + +// QS8 +BENCHMARK_DEFINE_F(AlexNetSystemTestQS8, neon_alexnet) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run AlexNet + profiler.start(); + network.run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(AlexNetSystemTestQS8, neon_alexnet) +->Threads(1) +->Iterations(10) +->ArgName("batch_size") +->Arg(1) +->Arg(4) +->Arg(8);
\ No newline at end of file diff --git a/tests/benchmark/system_tests/NEON/LeNet5.cpp b/tests/benchmark/system_tests/NEON/LeNet5.cpp new file mode 100644 index 0000000000..5170f05a70 --- /dev/null +++ b/tests/benchmark/system_tests/NEON/LeNet5.cpp @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "Globals.h" +#include "NEON/Helper.h" +#include "NEON/NEAccessor.h" +#include "TensorLibrary.h" +#include "benchmark/Datasets.h" +#include "benchmark/Profiler.h" +#include "benchmark/WallClockTimer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" +#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" +#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" +#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h" +#include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include "benchmark/benchmark_api.h" + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; +using namespace arm_compute::test::neon; + +#include "benchmark/system_tests/common/LeNet5.h" + +namespace +{ +using LeNet5SystemTest = LeNet5Fixture<Tensor, + NEAccessor, + NEActivationLayer, + NEConvolutionLayer, + NEFullyConnectedLayer, + NEPoolingLayer, + NESoftmaxLayer>; +} // namespace + +BENCHMARK_DEFINE_F(LeNet5SystemTest, neon_lenet5) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run LeNet5 + profiler.start(); + network.run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(LeNet5SystemTest, neon_lenet5) +->Threads(1) +->Iterations(10) +->ArgName("batch_size") +->Arg(1) +->Arg(16) +->Arg(32); diff --git a/tests/benchmark/system_tests/common/AlexNet.h b/tests/benchmark/system_tests/common/AlexNet.h new file mode 100644 index 0000000000..9c93dc7228 --- /dev/null +++ b/tests/benchmark/system_tests/common/AlexNet.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TEST_BENCHMARK_ALEXNET_H__ +#define __ARM_COMPUTE_TEST_BENCHMARK_ALEXNET_H__ + +#include "TensorLibrary.h" +#include "Utils.h" + +#include "benchmark/Profiler.h" +#include "benchmark/WallClockTimer.h" + +#include "model_objects/AlexNet.h" + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; + +namespace arm_compute +{ +namespace test +{ +namespace benchmark +{ +template <typename ITensorType, + typename TensorType, + typename SubTensorType, + typename Accessor, + typename ActivationLayerFunction, + typename ConvolutionLayerFunction, + typename FullyConnectedLayerFunction, + typename NormalizationLayerFunction, + typename PoolingLayerFunction, + typename SoftmaxLayerFunction, + DataType dt = DataType::F32> +class AlexNetFixture : public ::benchmark::Fixture +{ +public: + void SetUp(::benchmark::State &state) override + { + profiler.add(std::make_shared<WallClockTimer>()); + + const unsigned int batches = static_cast<unsigned int>(state.range(0)); + const bool weights_transposed = true; + + network.init_weights(batches, weights_transposed); + network.build(); + network.allocate(); + network.fill_random(); + } + + void TearDown(::benchmark::State &state) override + { + profiler.submit(state); + network.clear(); + } + + Profiler profiler{}; + model_objects::AlexNet<ITensorType, + TensorType, + SubTensorType, + Accessor, + ActivationLayerFunction, + ConvolutionLayerFunction, + FullyConnectedLayerFunction, + NormalizationLayerFunction, + PoolingLayerFunction, + SoftmaxLayerFunction, + dt> + network{}; +}; +} // namespace benchmark +} // namespace test +} // namespace arm_compute +#endif //__ARM_COMPUTE_TEST_BENCHMARK_ALEXNET_H__ diff --git a/tests/benchmark/system_tests/common/LeNet5.h b/tests/benchmark/system_tests/common/LeNet5.h new file mode 100644 index 0000000000..db34f6813a --- /dev/null +++ b/tests/benchmark/system_tests/common/LeNet5.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TEST_BENCHMARK_LENET5_H__ +#define __ARM_COMPUTE_TEST_BENCHMARK_LENET5_H__ + +#include "TensorLibrary.h" +#include "Utils.h" + +#include "benchmark/Profiler.h" +#include "benchmark/WallClockTimer.h" + +#include "model_objects/LeNet5.h" + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::benchmark; + +namespace arm_compute +{ +namespace test +{ +namespace benchmark +{ +template <typename TensorType, + typename Accessor, + typename ActivationLayerFunction, + typename ConvolutionLayerFunction, + typename FullyConnectedLayerFunction, + typename PoolingLayerFunction, + typename SoftmaxLayerFunction> +class LeNet5Fixture : public ::benchmark::Fixture +{ +public: + void SetUp(::benchmark::State &state) override + { + profiler.add(std::make_shared<WallClockTimer>()); + + network.build(static_cast<unsigned int>(state.range(0))); + network.fill_random(); + } + + void TearDown(::benchmark::State &state) override + { + profiler.submit(state); + network.clear(); + } + + Profiler profiler{}; + model_objects::LeNet5<TensorType, + Accessor, + ActivationLayerFunction, + ConvolutionLayerFunction, + FullyConnectedLayerFunction, + PoolingLayerFunction, + SoftmaxLayerFunction> + network{}; +}; +} // namespace benchmark +} // namespace test +} // namespace arm_compute +#endif //__ARM_COMPUTE_TEST_BENCHMARK_LENET5_H__ |