From 3b278e9261bd0de67c82f7d6c36731f118124f52 Mon Sep 17 00:00:00 2001 From: Aron Virginas-Tar Date: Fri, 12 Oct 2018 13:00:55 +0100 Subject: IVGCVSW-1938: Move backend-specific source files to the corresponding backend Change-Id: I558a9a007604afc55e536d877f8da7d0215cc9c3 --- Android.mk | 12 +- CMakeLists.txt | 33 ---- src/armnn/NeonInterceptorScheduler.cpp | 47 ------ src/armnn/NeonInterceptorScheduler.hpp | 38 ----- src/armnn/NeonTimer.cpp | 63 -------- src/armnn/NeonTimer.hpp | 43 ----- src/armnn/OpenClTimer.cpp | 105 ------------ src/armnn/OpenClTimer.hpp | 59 ------- src/armnn/test/CreateWorkloadClNeon.hpp | 108 ------------- src/armnn/test/FP16SupportTest.cpp | 115 ------------- src/armnn/test/NeonTimerTest.cpp | 103 ------------ src/armnn/test/OpenClTimerTest.cpp | 137 ---------------- src/backends/aclCommon/CMakeLists.txt | 2 + src/backends/aclCommon/common.cmake | 1 + src/backends/aclCommon/test/CMakeLists.txt | 14 ++ .../aclCommon/test/CreateWorkloadClNeon.hpp | 107 ++++++++++++ src/backends/aclCommon/test/MemCopyTests.cpp | 62 +++++++ src/backends/cl/CMakeLists.txt | 5 + src/backends/cl/OpenClTimer.cpp | 105 ++++++++++++ src/backends/cl/OpenClTimer.hpp | 59 +++++++ src/backends/cl/backend.mk | 1 + src/backends/cl/test/CMakeLists.txt | 2 + src/backends/cl/test/ClCreateWorkloadTests.cpp | 6 +- src/backends/cl/test/ClMemCopyTests.cpp | 39 +++++ src/backends/cl/test/Fp16SupportTest.cpp | 112 +++++++++++++ src/backends/cl/test/OpenClTimerTest.cpp | 143 ++++++++++++++++ src/backends/cl/workloads/ClWorkloadUtils.hpp | 4 +- src/backends/neon/CMakeLists.txt | 4 + src/backends/neon/NeonInterceptorScheduler.cpp | 47 ++++++ src/backends/neon/NeonInterceptorScheduler.hpp | 38 +++++ src/backends/neon/NeonTimer.cpp | 63 ++++++++ src/backends/neon/NeonTimer.hpp | 43 +++++ src/backends/neon/backend.mk | 2 + src/backends/neon/test/CMakeLists.txt | 2 + src/backends/neon/test/NeonCreateWorkloadTests.cpp | 5 +- src/backends/neon/test/NeonMemCopyTests.cpp | 39 +++++ src/backends/neon/test/NeonTimerTest.cpp | 105 ++++++++++++ src/backends/neon/workloads/NeonWorkloadUtils.hpp | 4 +- src/backends/test/LayerTests.cpp | 5 - src/backends/test/MemCopyTestImpl.hpp | 84 ++++++++++ src/backends/test/MemCopyTests.cpp | 180 --------------------- 41 files changed, 1096 insertions(+), 1050 deletions(-) delete mode 100644 src/armnn/NeonInterceptorScheduler.cpp delete mode 100644 src/armnn/NeonInterceptorScheduler.hpp delete mode 100644 src/armnn/NeonTimer.cpp delete mode 100644 src/armnn/NeonTimer.hpp delete mode 100644 src/armnn/OpenClTimer.cpp delete mode 100644 src/armnn/OpenClTimer.hpp delete mode 100644 src/armnn/test/CreateWorkloadClNeon.hpp delete mode 100644 src/armnn/test/FP16SupportTest.cpp delete mode 100644 src/armnn/test/NeonTimerTest.cpp delete mode 100644 src/armnn/test/OpenClTimerTest.cpp create mode 100644 src/backends/aclCommon/test/CMakeLists.txt create mode 100644 src/backends/aclCommon/test/CreateWorkloadClNeon.hpp create mode 100644 src/backends/aclCommon/test/MemCopyTests.cpp create mode 100644 src/backends/cl/OpenClTimer.cpp create mode 100644 src/backends/cl/OpenClTimer.hpp create mode 100644 src/backends/cl/test/ClMemCopyTests.cpp create mode 100644 src/backends/cl/test/Fp16SupportTest.cpp create mode 100644 src/backends/cl/test/OpenClTimerTest.cpp create mode 100644 src/backends/neon/NeonInterceptorScheduler.cpp create mode 100644 src/backends/neon/NeonInterceptorScheduler.hpp create mode 100644 src/backends/neon/NeonTimer.cpp create mode 100644 src/backends/neon/NeonTimer.hpp create mode 100644 src/backends/neon/test/NeonMemCopyTests.cpp create mode 100644 src/backends/neon/test/NeonTimerTest.cpp create mode 100644 src/backends/test/MemCopyTestImpl.hpp delete mode 100644 src/backends/test/MemCopyTests.cpp diff --git a/Android.mk b/Android.mk index 1155153e3f..87fec7b309 100644 --- a/Android.mk +++ b/Android.mk @@ -113,10 +113,7 @@ LOCAL_SRC_FILES := \ src/armnn/InternalTypes.cpp \ src/armnn/Layer.cpp \ src/armnn/LoadedNetwork.cpp \ - src/armnn/NeonInterceptorScheduler.cpp \ - src/armnn/NeonTimer.cpp \ src/armnn/Network.cpp \ - src/armnn/OpenClTimer.cpp \ src/armnn/WallClockTimer.cpp \ src/armnn/ProfilingEvent.cpp \ src/armnn/Profiling.cpp \ @@ -187,23 +184,26 @@ LOCAL_SRC_FILES := \ src/armnn/test/GraphTests.cpp \ src/armnn/test/RuntimeTests.cpp \ src/armnn/test/TensorTest.cpp \ - src/armnn/test/NeonTimerTest.cpp \ src/armnn/test/NetworkTests.cpp \ src/armnn/test/InstrumentTests.cpp \ - src/armnn/test/OpenClTimerTest.cpp \ src/armnn/test/ProfilingEventTest.cpp \ src/armnn/test/ObservableTest.cpp \ src/armnn/test/OptionalTest.cpp \ src/backends/test/WorkloadDataValidation.cpp \ src/backends/test/TensorCopyUtils.cpp \ src/backends/test/LayerTests.cpp \ - src/backends/test/MemCopyTests.cpp \ + src/backends/aclCommon/test/MemCopyTests.cpp \ src/backends/cl/test/ClCreateWorkloadTests.cpp \ src/backends/cl/test/ClLayerSupportTests.cpp \ src/backends/cl/test/ClLayerTests.cpp \ + src/backends/cl/test/ClMemCopyTests.cpp \ + src/backends/cl/test/Fp16SupportTest.cpp \ + src/backends/cl/test/OpenClTimerTest.cpp \ src/backends/neon/test/NeonCreateWorkloadTests.cpp \ src/backends/neon/test/NeonLayerSupportTests.cpp \ src/backends/neon/test/NeonLayerTests.cpp \ + src/backends/neon/test/NeonMemCopyTests.cpp \ + src/backends/neon/test/NeonTimerTest.cpp \ src/backends/reference/test/RefCreateWorkloadTests.cpp \ src/backends/reference/test/RefLayerSupportTests.cpp \ src/backends/reference/test/RefLayerTests.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 7325d40303..1956643fb7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -277,21 +277,6 @@ list(APPEND armnn_sources src/armnn/Observable.cpp ) -if(ARMCOMPUTENEON) - # Additionally include source files for ARM Compute NEON backend - list(APPEND armnn_sources - src/armnn/NeonInterceptorScheduler.hpp - src/armnn/NeonInterceptorScheduler.cpp - src/armnn/NeonTimer.hpp - src/armnn/NeonTimer.cpp) -endif() -if(ARMCOMPUTECL) - # Additionally include source files for ARM Compute OpenCL backend - list(APPEND armnn_sources - src/armnn/OpenClTimer.cpp - src/armnn/OpenClTimer.hpp) -endif() -# Files shared by all ARM Compute backends if(ARMCOMPUTENEON OR ARMCOMPUTECL) list(APPEND armnn_sources src/armnn/memory/IMemoryPool.hpp @@ -405,24 +390,6 @@ if(BUILD_UNIT_TESTS) src/backends/test/WorkloadTestUtils.hpp src/backends/test/QuantizeHelper.hpp) - if(ARMCOMPUTENEON) - list(APPEND unittest_sources - src/armnn/test/CreateWorkloadClNeon.hpp - src/armnn/test/NeonTimerTest.cpp) - endif() - - if(ARMCOMPUTECL) - list(APPEND unittest_sources - src/armnn/test/CreateWorkloadClNeon.hpp - src/armnn/test/OpenClTimerTest.cpp - src/armnn/test/FP16SupportTest.cpp) - endif() - - if(ARMCOMPUTENEON OR ARMCOMPUTECL) - list(APPEND unittest_sources - src/backends/test/MemCopyTests.cpp) - endif() - if(BUILD_TF_PARSER) list(APPEND unittest_sources src/armnnTfParser/test/Activations.cpp diff --git a/src/armnn/NeonInterceptorScheduler.cpp b/src/armnn/NeonInterceptorScheduler.cpp deleted file mode 100644 index 03b4670296..0000000000 --- a/src/armnn/NeonInterceptorScheduler.cpp +++ /dev/null @@ -1,47 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonInterceptorScheduler.hpp" - -#include - -namespace armnn{ - -NeonInterceptorScheduler::NeonInterceptorScheduler(arm_compute::IScheduler &realScheduler) - : m_Kernels(nullptr), m_RealScheduler(realScheduler) -{ -} - -void NeonInterceptorScheduler::set_num_threads(unsigned int numThreads) -{ - m_RealScheduler.set_num_threads(numThreads); -} - -unsigned int NeonInterceptorScheduler::num_threads() const -{ - return m_RealScheduler.num_threads(); -} - -void NeonInterceptorScheduler::schedule(arm_compute::ICPPKernel* kernel, const Hints& hints) -{ - WallClockTimer::clock::time_point startTime = WallClockTimer::clock::now(); - m_RealScheduler.schedule(kernel, hints.split_dimension()); - WallClockTimer::clock::time_point stopTime = WallClockTimer::clock::now(); - - const auto delta = std::chrono::duration(stopTime - startTime); - m_Kernels->emplace_back(kernel->name(), delta.count(), Measurement::Unit::TIME_US); -} - -void NeonInterceptorScheduler::run_workloads(std::vector & workloads) -{ - WallClockTimer::clock::time_point startTime = WallClockTimer::clock::now(); - m_RealScheduler.run_tagged_workloads(workloads, nullptr); - WallClockTimer::clock::time_point stopTime = WallClockTimer::clock::now(); - - const auto delta = std::chrono::duration(stopTime - startTime); - m_Kernels->emplace_back(std::string("Workload"), delta.count(), Measurement::Unit::TIME_US); -} - -} // namespace armnn \ No newline at end of file diff --git a/src/armnn/NeonInterceptorScheduler.hpp b/src/armnn/NeonInterceptorScheduler.hpp deleted file mode 100644 index f33b79a2da..0000000000 --- a/src/armnn/NeonInterceptorScheduler.hpp +++ /dev/null @@ -1,38 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "NeonTimer.hpp" -#include "WallClockTimer.hpp" - -#include -#include -#include - -namespace armnn -{ - -class NeonInterceptorScheduler : public arm_compute::IScheduler -{ -public: - NeonInterceptorScheduler(arm_compute::IScheduler &realScheduler); - ~NeonInterceptorScheduler() = default; - - void set_num_threads(unsigned int numThreads) override; - - unsigned int num_threads() const override; - - void schedule(arm_compute::ICPPKernel *kernel, const Hints &hints) override; - - void run_workloads(std::vector &workloads) override; - - void SetKernels(NeonTimer::KernelMeasurements* kernels) { m_Kernels = kernels; } - NeonTimer::KernelMeasurements* GetKernels() { return m_Kernels; } -private: - NeonTimer::KernelMeasurements* m_Kernels; - arm_compute::IScheduler& m_RealScheduler; -}; - -} // namespace armnn diff --git a/src/armnn/NeonTimer.cpp b/src/armnn/NeonTimer.cpp deleted file mode 100644 index 219edc9680..0000000000 --- a/src/armnn/NeonTimer.cpp +++ /dev/null @@ -1,63 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonTimer.hpp" -#include "NeonInterceptorScheduler.hpp" - -#include - -#include -#include - -namespace armnn -{ -namespace -{ -static thread_local auto g_Interceptor = std::make_shared(arm_compute::Scheduler::get()); -} - -void NeonTimer::Start() -{ - m_Kernels.clear(); - BOOST_ASSERT(g_Interceptor->GetKernels() == nullptr); - g_Interceptor->SetKernels(&m_Kernels); - - m_RealSchedulerType = arm_compute::Scheduler::get_type(); - //Note: We can't currently replace a custom scheduler - if(m_RealSchedulerType != arm_compute::Scheduler::Type::CUSTOM) - { - // Keep the real schedule and add NeonInterceptorScheduler as an interceptor - m_RealScheduler = &arm_compute::Scheduler::get(); - arm_compute::Scheduler::set(std::static_pointer_cast(g_Interceptor)); - } -} - -void NeonTimer::Stop() -{ - // Restore real scheduler - g_Interceptor->SetKernels(nullptr); - arm_compute::Scheduler::set(m_RealSchedulerType); - m_RealScheduler = nullptr; -} - -std::vector NeonTimer::GetMeasurements() const -{ - std::vector measurements = m_Kernels; - unsigned int kernel_number = 0; - for (auto & kernel : measurements) - { - std::string kernelName = std::string(this->GetName()) + "/" + std::to_string(kernel_number++) + ": " + kernel - .m_Name; - kernel.m_Name = kernelName; - } - return measurements; -} - -const char* NeonTimer::GetName() const -{ - return "NeonKernelTimer"; -} - -} diff --git a/src/armnn/NeonTimer.hpp b/src/armnn/NeonTimer.hpp deleted file mode 100644 index 31d3e85a7c..0000000000 --- a/src/armnn/NeonTimer.hpp +++ /dev/null @@ -1,43 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "Instrument.hpp" - -#include -#include -#include - -#include -#include -#include - -namespace armnn -{ - -class NeonTimer : public Instrument -{ -public: - using KernelMeasurements = std::vector; - - NeonTimer() = default; - ~NeonTimer() = default; - - void Start() override; - - void Stop() override; - - std::vector GetMeasurements() const override; - - const char* GetName() const override; - -private: - KernelMeasurements m_Kernels; - arm_compute::IScheduler* m_RealScheduler; - arm_compute::Scheduler::Type m_RealSchedulerType; -}; - -} \ No newline at end of file diff --git a/src/armnn/OpenClTimer.cpp b/src/armnn/OpenClTimer.cpp deleted file mode 100644 index 57552d7bd9..0000000000 --- a/src/armnn/OpenClTimer.cpp +++ /dev/null @@ -1,105 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "OpenClTimer.hpp" - -#include -#include - -namespace armnn -{ - -OpenClTimer::OpenClTimer() -{ -} - -void OpenClTimer::Start() -{ - m_Kernels.clear(); - - auto interceptor = [this]( cl_command_queue command_queue, - cl_kernel kernel, - cl_uint work_dim, - const size_t *gwo, - const size_t *gws, - const size_t *lws, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) - { - cl_int retVal = 0; - - // Get the name of the kernel - cl::Kernel retainedKernel(kernel, true); - std::stringstream ss; - ss << retainedKernel.getInfo(); - - // Embed workgroup sizes into the name - if(gws != nullptr) - { - ss << " GWS[" << gws[0] << "," << gws[1] << "," << gws[2] << "]"; - } - if(lws != nullptr) - { - ss << " LWS[" << lws[0] << "," << lws[1] << "," << lws[2] << "]"; - } - - cl_event customEvent; - - // Forward to original OpenCl function - retVal = m_OriginalEnqueueFunction( command_queue, - kernel, - work_dim, - gwo, - gws, - lws, - num_events_in_wait_list, - event_wait_list, - &customEvent); - - // Store the Kernel info for later GetMeasurements() call - m_Kernels.emplace_back(ss.str(), customEvent); - - return retVal; - }; - - m_OriginalEnqueueFunction = CLSymbols::get().clEnqueueNDRangeKernel_ptr; - CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor; -} - -void OpenClTimer::Stop() -{ - CLSymbols::get().clEnqueueNDRangeKernel_ptr = m_OriginalEnqueueFunction; -} - -std::vector OpenClTimer::GetMeasurements() const -{ - std::vector measurements; - - cl_command_queue_properties clQueueProperties = CLScheduler::get().queue().getInfo(); - - int idx = 0; - for (auto& kernel : m_Kernels) - { - std::string name = std::string(this->GetName()) + "/" + std::to_string(idx++) + ": " + kernel.m_Name; - - double timeUs = 0.0; - if((clQueueProperties & CL_QUEUE_PROFILING_ENABLE) != 0) - { - // Wait for the event to finish before accessing profile results. - kernel.m_Event.wait(); - - cl_ulong start = kernel.m_Event.getProfilingInfo(); - cl_ulong end = kernel.m_Event.getProfilingInfo(); - timeUs = static_cast(end - start) / 1000.0; - } - - measurements.emplace_back(name, timeUs, Measurement::Unit::TIME_US); - } - - return measurements; -} - -} //namespace armnn diff --git a/src/armnn/OpenClTimer.hpp b/src/armnn/OpenClTimer.hpp deleted file mode 100644 index ca044a405e..0000000000 --- a/src/armnn/OpenClTimer.hpp +++ /dev/null @@ -1,59 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "Instrument.hpp" - -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/core/CL/OpenCL.h" - -#include -#include - -namespace armnn -{ - -/// OpenClTimer instrument that times all OpenCl kernels executed between calls to Start() and Stop(). -class OpenClTimer : public Instrument -{ -public: - OpenClTimer(); - ~OpenClTimer() = default; - - /// Start the OpenCl timer - void Start() override; - - /// Stop the OpenCl timer - void Stop() override; - - /// Get the name of the timer - /// \return Name of the timer - const char* GetName() const override { return "OpenClKernelTimer"; } - - /// Get the recorded measurements. This will be a list of the execution durations for all the OpenCl kernels. - /// \return Recorded measurements - std::vector GetMeasurements() const override; - -private: - using CLScheduler = arm_compute::CLScheduler; - using CLSymbols = arm_compute::CLSymbols; - using ClEvent = cl::Event; - using ClEnqueueFunc = decltype(CLSymbols::clEnqueueNDRangeKernel_ptr); - - /// Stores info about the OpenCl kernel - struct KernelInfo - { - KernelInfo(const std::string& name, cl_event& event) : m_Name(name), m_Event(event) {} - - std::string m_Name; - ClEvent m_Event; - }; - - std::list m_Kernels; ///< List of all kernels executed - ClEnqueueFunc m_OriginalEnqueueFunction; ///< Keep track of original OpenCl function -}; - -} //namespace armnn \ No newline at end of file diff --git a/src/armnn/test/CreateWorkloadClNeon.hpp b/src/armnn/test/CreateWorkloadClNeon.hpp deleted file mode 100644 index 56de085f8e..0000000000 --- a/src/armnn/test/CreateWorkloadClNeon.hpp +++ /dev/null @@ -1,108 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "CreateWorkload.hpp" - -#include -#include - -#if ARMCOMPUTECL_ENABLED -#include -#endif - -#if ARMCOMPUTENEON_ENABLED -#include -#endif - - -using namespace armnn; - -namespace -{ - -using namespace std; - -template -boost::test_tools::predicate_result CompareTensorHandleShape(IComputeTensorHandle* tensorHandle, - std::initializer_list expectedDimensions) -{ - arm_compute::ITensorInfo* info = tensorHandle->GetTensor().info(); - - auto infoNumDims = info->num_dimensions(); - auto numExpectedDims = expectedDimensions.size(); - if (infoNumDims != numExpectedDims) - { - boost::test_tools::predicate_result res(false); - res.message() << "Different number of dimensions [" << info->num_dimensions() - << "!=" << expectedDimensions.size() << "]"; - return res; - } - - size_t i = info->num_dimensions() - 1; - - for (unsigned int expectedDimension : expectedDimensions) - { - if (info->dimension(i) != expectedDimension) - { - boost::test_tools::predicate_result res(false); - res.message() << "Different dimension [" << info->dimension(i) << "!=" << expectedDimension << "]"; - return res; - } - - i--; - } - - return true; -} - -template -void CreateMemCopyWorkloads(IWorkloadFactory& factory) -{ - Graph graph; - RefWorkloadFactory refFactory; - - // Creates the layers we're testing. - Layer* const layer1 = graph.AddLayer("layer1"); - Layer* const layer2 = graph.AddLayer("layer2"); - - // Creates extra layers. - Layer* const input = graph.AddLayer(0, "input"); - Layer* const output = graph.AddLayer(0, "output"); - - // Connects up. - TensorInfo tensorInfo({2, 3}, DataType::Float32); - Connect(input, layer1, tensorInfo); - Connect(layer1, layer2, tensorInfo); - Connect(layer2, output, tensorInfo); - - input->CreateTensorHandles(graph, refFactory); - layer1->CreateTensorHandles(graph, factory); - layer2->CreateTensorHandles(graph, refFactory); - output->CreateTensorHandles(graph, refFactory); - - // make the workloads and check them - auto workload1 = MakeAndCheckWorkload(*layer1, graph, factory); - auto workload2 = MakeAndCheckWorkload(*layer2, graph, refFactory); - - MemCopyQueueDescriptor queueDescriptor1 = workload1->GetData(); - BOOST_TEST(queueDescriptor1.m_Inputs.size() == 1); - BOOST_TEST(queueDescriptor1.m_Outputs.size() == 1); - auto inputHandle1 = boost::polymorphic_downcast(queueDescriptor1.m_Inputs[0]); - auto outputHandle1 = boost::polymorphic_downcast(queueDescriptor1.m_Outputs[0]); - BOOST_TEST((inputHandle1->GetTensorInfo() == TensorInfo({2, 3}, DataType::Float32))); - BOOST_TEST(CompareTensorHandleShape(outputHandle1, {2, 3})); - - - MemCopyQueueDescriptor queueDescriptor2 = workload2->GetData(); - BOOST_TEST(queueDescriptor2.m_Inputs.size() == 1); - BOOST_TEST(queueDescriptor2.m_Outputs.size() == 1); - auto inputHandle2 = boost::polymorphic_downcast(queueDescriptor2.m_Inputs[0]); - auto outputHandle2 = boost::polymorphic_downcast(queueDescriptor2.m_Outputs[0]); - BOOST_TEST(CompareTensorHandleShape(inputHandle2, {2, 3})); - BOOST_TEST((outputHandle2->GetTensorInfo() == TensorInfo({2, 3}, DataType::Float32))); -} - -} //namespace \ No newline at end of file diff --git a/src/armnn/test/FP16SupportTest.cpp b/src/armnn/test/FP16SupportTest.cpp deleted file mode 100644 index 2706d1f363..0000000000 --- a/src/armnn/test/FP16SupportTest.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include - -#include - -using namespace armnn; - -BOOST_AUTO_TEST_SUITE(Fp16Support) - -BOOST_AUTO_TEST_CASE(Fp16DataTypeSupport) -{ - Graph graph; - - Layer* const inputLayer1 = graph.AddLayer(1, "input1"); - Layer* const inputLayer2 = graph.AddLayer(2, "input2"); - - Layer* const additionLayer = graph.AddLayer("addition"); - Layer* const outputLayer = graph.AddLayer(0, "output"); - - TensorInfo fp16TensorInfo({1, 2, 3, 5}, armnn::DataType::Float16); - inputLayer1->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0)); - inputLayer2->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(1)); - additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); - - inputLayer1->GetOutputSlot().SetTensorInfo(fp16TensorInfo); - inputLayer2->GetOutputSlot().SetTensorInfo(fp16TensorInfo); - additionLayer->GetOutputSlot().SetTensorInfo(fp16TensorInfo); - - BOOST_CHECK(inputLayer1->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); - BOOST_CHECK(inputLayer2->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); - BOOST_CHECK(additionLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); - -} - -BOOST_AUTO_TEST_CASE(Fp16AdditionTest) -{ - using namespace half_float::literal; - // Create runtime in which test will run - IRuntime::CreationOptions options; - IRuntimePtr runtime(IRuntime::Create(options)); - - // Builds up the structure of the network. - INetworkPtr net(INetwork::Create()); - - - IConnectableLayer* inputLayer1 = net->AddInputLayer(0); - IConnectableLayer* inputLayer2 = net->AddInputLayer(1); - IConnectableLayer* additionLayer = net->AddAdditionLayer(); - IConnectableLayer* outputLayer = net->AddOutputLayer(0); - - inputLayer1->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0)); - inputLayer2->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(1)); - additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); - - //change to float16 - TensorInfo fp16TensorInfo(TensorShape({4}), DataType::Float16); - inputLayer1->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo); - inputLayer2->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo); - additionLayer->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo); - - // optimize the network - std::vector backends = {Compute::GpuAcc}; - IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); - - // Loads it into the runtime. - - NetworkId netId; - runtime->LoadNetwork(netId, std::move(optNet)); - - std::vector input1Data - { - 1.0_h, 2.0_h, 3.0_h, 4.0_h - }; - - std::vector input2Data - { - 100.0_h, 200.0_h, 300.0_h, 400.0_h - }; - - InputTensors inputTensors - { - {0,ConstTensor(runtime->GetInputTensorInfo(netId, 0), input1Data.data())}, - {1,ConstTensor(runtime->GetInputTensorInfo(netId, 0), input2Data.data())} - }; - - std::vector outputData(input1Data.size()); - OutputTensors outputTensors - { - {0,Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} - }; - - // Does the inference. - runtime->EnqueueWorkload(netId, inputTensors, outputTensors); - - // Checks the results. - BOOST_TEST(outputData == std::vector({ 101.0_h, 202.0_h, 303.0_h, 404.0_h})); // Add -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/test/NeonTimerTest.cpp b/src/armnn/test/NeonTimerTest.cpp deleted file mode 100644 index 6d0429c8b9..0000000000 --- a/src/armnn/test/NeonTimerTest.cpp +++ /dev/null @@ -1,103 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include -#include "TensorHelpers.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -using namespace armnn; - -BOOST_AUTO_TEST_SUITE(NeonTimerInstrument) - - -BOOST_AUTO_TEST_CASE(NeonTimerGetName) -{ - NeonTimer neonTimer; - BOOST_CHECK_EQUAL(neonTimer.GetName(), "NeonKernelTimer"); -} - -BOOST_AUTO_TEST_CASE(NeonTimerMeasure) -{ - NeonWorkloadFactory workloadFactory; - - unsigned int inputWidth = 4000u; - unsigned int inputHeight = 5000u; - unsigned int inputChannels = 1u; - unsigned int inputBatchSize = 1u; - - float upperBound = 1.0f; - float lowerBound = -1.0f; - - size_t inputSize = inputWidth * inputHeight * inputChannels * inputBatchSize; - std::vector inputData(inputSize, 0.f); - std::generate(inputData.begin(), inputData.end(), [](){ - return (static_cast(rand()) / static_cast(RAND_MAX / 3)) + 1.f; }); - - unsigned int outputWidth = inputWidth; - unsigned int outputHeight = inputHeight; - unsigned int outputChannels = inputChannels; - unsigned int outputBatchSize = inputBatchSize; - - armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, - armnn::GetDataType()); - - armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, - armnn::GetDataType()); - - LayerTestResult result(inputTensorInfo); - - auto input = MakeTensor(inputTensorInfo, inputData); - - std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - // Setup bounded ReLu - armnn::ActivationQueueDescriptor descriptor; - armnn::WorkloadInfo workloadInfo; - AddInputToWorkload(descriptor, workloadInfo, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(descriptor, workloadInfo, outputTensorInfo, outputHandle.get()); - - descriptor.m_Parameters.m_Function = armnn::ActivationFunction::BoundedReLu; - descriptor.m_Parameters.m_A = upperBound; - descriptor.m_Parameters.m_B = lowerBound; - - std::unique_ptr workload = workloadFactory.CreateActivation(descriptor, workloadInfo); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - NeonTimer neonTimer; - // Start the timer. - neonTimer.Start(); - // Execute the workload. - workload->Execute(); - // Stop the timer. - neonTimer.Stop(); - - std::vector measurements = neonTimer.GetMeasurements(); - - BOOST_CHECK_EQUAL(measurements.size(), 2); - BOOST_CHECK_EQUAL(measurements[0].m_Name, "NeonKernelTimer/0: NEFillBorderKernel"); - BOOST_CHECK(measurements[0].m_Value > 0.0); - BOOST_CHECK_EQUAL(measurements[1].m_Name, "NeonKernelTimer/1: NEActivationLayerKernel"); - BOOST_CHECK(measurements[1].m_Value > 0.0); -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/test/OpenClTimerTest.cpp b/src/armnn/test/OpenClTimerTest.cpp deleted file mode 100644 index 76cffec4f3..0000000000 --- a/src/armnn/test/OpenClTimerTest.cpp +++ /dev/null @@ -1,137 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#if (defined(__aarch64__)) || (defined(__x86_64__)) // disable test failing on FireFly/Armv7 - -#include -#include -#include -#include -#include -#include -#include -#include -#include "TensorHelpers.hpp" -#include -#include -#include - -using namespace armnn; - -struct OpenClFixture -{ - // Initialising ClContextControl to ensure OpenCL is loaded correctly for each test case. - // NOTE: Profiling needs to be enabled in ClContextControl to be able to obtain execution - // times from OpenClTimer. - OpenClFixture() : m_ClContextControl(nullptr, true) {} - ~OpenClFixture() {} - - ClContextControl m_ClContextControl; -}; - -BOOST_FIXTURE_TEST_SUITE(OpenClTimerBatchNorm, OpenClFixture) -using FactoryType = ClWorkloadFactory; - -BOOST_AUTO_TEST_CASE(OpenClTimerBatchNorm) -{ - ClWorkloadFactory workloadFactory; - - const unsigned int width = 2; - const unsigned int height = 3; - const unsigned int channels = 2; - const unsigned int num = 1; - int32_t qOffset = 0; - float qScale = 0.f; - - TensorInfo inputTensorInfo({num, channels, height, width}, GetDataType()); - TensorInfo outputTensorInfo({num, channels, height, width}, GetDataType()); - TensorInfo tensorInfo({channels}, GetDataType()); - - // Set quantization parameters if the requested type is a quantized type. - if(IsQuantizedType()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - tensorInfo.SetQuantizationScale(qScale); - tensorInfo.SetQuantizationOffset(qOffset); - } - - auto input = MakeTensor(inputTensorInfo, - QuantizedVector(qScale, qOffset, - { - 1.f, 4.f, - 4.f, 2.f, - 1.f, 6.f, - - 1.f, 1.f, - 4.f, 1.f, - -2.f, 4.f - })); - // these values are per-channel of the input - auto mean = MakeTensor(tensorInfo, QuantizedVector(qScale, qOffset, {3, -2})); - auto variance = MakeTensor(tensorInfo, QuantizedVector(qScale, qOffset, {4, 9})); - auto beta = MakeTensor(tensorInfo, QuantizedVector(qScale, qOffset, {3, 2})); - auto gamma = MakeTensor(tensorInfo, QuantizedVector(qScale, qOffset, {2, 1})); - - std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - BatchNormalizationQueueDescriptor data; - WorkloadInfo info; - ScopedCpuTensorHandle meanTensor(tensorInfo); - ScopedCpuTensorHandle varianceTensor(tensorInfo); - ScopedCpuTensorHandle betaTensor(tensorInfo); - ScopedCpuTensorHandle gammaTensor(tensorInfo); - - AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]); - AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]); - AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]); - AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]); - - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - data.m_Mean = &meanTensor; - data.m_Variance = &varianceTensor; - data.m_Beta = &betaTensor; - data.m_Gamma = &gammaTensor; - data.m_Parameters.m_Eps = 0.0f; - - // for each channel: - // substract mean, divide by standard deviation (with an epsilon to avoid div by 0) - // multiply by gamma and add beta - std::unique_ptr workload = workloadFactory.CreateBatchNormalization(data, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - OpenClTimer openClTimer; - - BOOST_CHECK_EQUAL(openClTimer.GetName(), "OpenClKernelTimer"); - - //Start the timer - openClTimer.Start(); - - //Execute the workload - workload->Execute(); - - //Stop the timer - openClTimer.Stop(); - - BOOST_CHECK_EQUAL(openClTimer.GetMeasurements().size(), 1); - - BOOST_CHECK_EQUAL(openClTimer.GetMeasurements().front().m_Name, - "OpenClKernelTimer/0: batchnormalization_layer_nchw GWS[1,3,2]"); - - BOOST_CHECK(openClTimer.GetMeasurements().front().m_Value > 0); - -} - -BOOST_AUTO_TEST_SUITE_END() - -#endif //aarch64 or x86_64 \ No newline at end of file diff --git a/src/backends/aclCommon/CMakeLists.txt b/src/backends/aclCommon/CMakeLists.txt index 42f914263a..6f99f4f146 100644 --- a/src/backends/aclCommon/CMakeLists.txt +++ b/src/backends/aclCommon/CMakeLists.txt @@ -9,6 +9,8 @@ list(APPEND armnnAclCommon_sources ArmComputeUtils.hpp ) +add_subdirectory(test) + add_library(armnnAclCommon STATIC ${armnnAclCommon_sources}) target_include_directories(armnnAclCommon PRIVATE ${PROJECT_SOURCE_DIR}/src) target_include_directories(armnnAclCommon PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) diff --git a/src/backends/aclCommon/common.cmake b/src/backends/aclCommon/common.cmake index d9d035f307..89be236a7f 100644 --- a/src/backends/aclCommon/common.cmake +++ b/src/backends/aclCommon/common.cmake @@ -6,4 +6,5 @@ if(ARMCOMPUTENEON OR ARMCOMPUTECL) add_subdirectory(${PROJECT_SOURCE_DIR}/src/backends/aclCommon) list(APPEND armnnLibraries armnnAclCommon) + list(APPEND armnnUnitTestLibraries armnnAclCommonUnitTests) endif() diff --git a/src/backends/aclCommon/test/CMakeLists.txt b/src/backends/aclCommon/test/CMakeLists.txt new file mode 100644 index 0000000000..98008edeb5 --- /dev/null +++ b/src/backends/aclCommon/test/CMakeLists.txt @@ -0,0 +1,14 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +list(APPEND armnnAclCommonUnitTests_sources + CreateWorkloadClNeon.hpp + MemCopyTests.cpp +) + +add_library(armnnAclCommonUnitTests OBJECT ${armnnAclCommonUnitTests_sources}) +target_include_directories(armnnAclCommonUnitTests PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_include_directories(armnnAclCommonUnitTests PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) +target_include_directories(armnnAclCommonUnitTests PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) \ No newline at end of file diff --git a/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp b/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp new file mode 100644 index 0000000000..a79cfe6b0d --- /dev/null +++ b/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp @@ -0,0 +1,107 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include + +#include +#include + +#if ARMCOMPUTECL_ENABLED +#include +#endif + +#if ARMCOMPUTENEON_ENABLED +#include +#endif + +using namespace armnn; + +namespace +{ + +using namespace std; + +template +boost::test_tools::predicate_result CompareTensorHandleShape(IComputeTensorHandle* tensorHandle, + std::initializer_list expectedDimensions) +{ + arm_compute::ITensorInfo* info = tensorHandle->GetTensor().info(); + + auto infoNumDims = info->num_dimensions(); + auto numExpectedDims = expectedDimensions.size(); + if (infoNumDims != numExpectedDims) + { + boost::test_tools::predicate_result res(false); + res.message() << "Different number of dimensions [" << info->num_dimensions() + << "!=" << expectedDimensions.size() << "]"; + return res; + } + + size_t i = info->num_dimensions() - 1; + + for (unsigned int expectedDimension : expectedDimensions) + { + if (info->dimension(i) != expectedDimension) + { + boost::test_tools::predicate_result res(false); + res.message() << "Different dimension [" << info->dimension(i) << "!=" << expectedDimension << "]"; + return res; + } + + i--; + } + + return true; +} + +template +void CreateMemCopyWorkloads(IWorkloadFactory& factory) +{ + Graph graph; + RefWorkloadFactory refFactory; + + // Creates the layers we're testing. + Layer* const layer1 = graph.AddLayer("layer1"); + Layer* const layer2 = graph.AddLayer("layer2"); + + // Creates extra layers. + Layer* const input = graph.AddLayer(0, "input"); + Layer* const output = graph.AddLayer(0, "output"); + + // Connects up. + TensorInfo tensorInfo({2, 3}, DataType::Float32); + Connect(input, layer1, tensorInfo); + Connect(layer1, layer2, tensorInfo); + Connect(layer2, output, tensorInfo); + + input->CreateTensorHandles(graph, refFactory); + layer1->CreateTensorHandles(graph, factory); + layer2->CreateTensorHandles(graph, refFactory); + output->CreateTensorHandles(graph, refFactory); + + // make the workloads and check them + auto workload1 = MakeAndCheckWorkload(*layer1, graph, factory); + auto workload2 = MakeAndCheckWorkload(*layer2, graph, refFactory); + + MemCopyQueueDescriptor queueDescriptor1 = workload1->GetData(); + BOOST_TEST(queueDescriptor1.m_Inputs.size() == 1); + BOOST_TEST(queueDescriptor1.m_Outputs.size() == 1); + auto inputHandle1 = boost::polymorphic_downcast(queueDescriptor1.m_Inputs[0]); + auto outputHandle1 = boost::polymorphic_downcast(queueDescriptor1.m_Outputs[0]); + BOOST_TEST((inputHandle1->GetTensorInfo() == TensorInfo({2, 3}, DataType::Float32))); + BOOST_TEST(CompareTensorHandleShape(outputHandle1, {2, 3})); + + + MemCopyQueueDescriptor queueDescriptor2 = workload2->GetData(); + BOOST_TEST(queueDescriptor2.m_Inputs.size() == 1); + BOOST_TEST(queueDescriptor2.m_Outputs.size() == 1); + auto inputHandle2 = boost::polymorphic_downcast(queueDescriptor2.m_Inputs[0]); + auto outputHandle2 = boost::polymorphic_downcast(queueDescriptor2.m_Outputs[0]); + BOOST_TEST(CompareTensorHandleShape(inputHandle2, {2, 3})); + BOOST_TEST((outputHandle2->GetTensorInfo() == TensorInfo({2, 3}, DataType::Float32))); +} + +} //namespace \ No newline at end of file diff --git a/src/backends/aclCommon/test/MemCopyTests.cpp b/src/backends/aclCommon/test/MemCopyTests.cpp new file mode 100644 index 0000000000..8ecdb1014d --- /dev/null +++ b/src/backends/aclCommon/test/MemCopyTests.cpp @@ -0,0 +1,62 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include +#include +#include + +#include + +BOOST_AUTO_TEST_SUITE(MemCopyTestSuite) + +BOOST_AUTO_TEST_CASE(AclTypeConversions) +{ + arm_compute::Strides strides(1, 2, 3, 4); + armnn::TensorShape convertedStrides = armnn::armcomputetensorutils::GetStrides(strides); + + BOOST_TEST(convertedStrides[0] == 4); + BOOST_TEST(convertedStrides[1] == 3); + BOOST_TEST(convertedStrides[2] == 2); + BOOST_TEST(convertedStrides[3] == 1); + + arm_compute::TensorShape shape(5, 6, 7, 8); + armnn::TensorShape convertedshape = armnn::armcomputetensorutils::GetShape(shape); + + BOOST_TEST(convertedshape[0] == 8); + BOOST_TEST(convertedshape[1] == 7); + BOOST_TEST(convertedshape[2] == 6); + BOOST_TEST(convertedshape[3] == 5); +} + +#if ARMCOMPUTECL_ENABLED && ARMCOMPUTENEON_ENABLED + +BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndGpu) +{ + LayerTestResult result = MemCopyTest(false); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndNeon) +{ + LayerTestResult result = MemCopyTest(false); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndGpuWithSubtensors) +{ + LayerTestResult result = MemCopyTest(true); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndNeonWithSubtensors) +{ + LayerTestResult result = MemCopyTest(true); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +#endif + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/cl/CMakeLists.txt b/src/backends/cl/CMakeLists.txt index 04da6ddcff..2f32081dfe 100644 --- a/src/backends/cl/CMakeLists.txt +++ b/src/backends/cl/CMakeLists.txt @@ -15,6 +15,11 @@ list(APPEND armnnClBackend_sources ) if(ARMCOMPUTECL) + list(APPEND armnnClBackend_sources + OpenClTimer.cpp + OpenClTimer.hpp + ) + add_subdirectory(workloads) add_subdirectory(test) endif() diff --git a/src/backends/cl/OpenClTimer.cpp b/src/backends/cl/OpenClTimer.cpp new file mode 100644 index 0000000000..57552d7bd9 --- /dev/null +++ b/src/backends/cl/OpenClTimer.cpp @@ -0,0 +1,105 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "OpenClTimer.hpp" + +#include +#include + +namespace armnn +{ + +OpenClTimer::OpenClTimer() +{ +} + +void OpenClTimer::Start() +{ + m_Kernels.clear(); + + auto interceptor = [this]( cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t *gwo, + const size_t *gws, + const size_t *lws, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) + { + cl_int retVal = 0; + + // Get the name of the kernel + cl::Kernel retainedKernel(kernel, true); + std::stringstream ss; + ss << retainedKernel.getInfo(); + + // Embed workgroup sizes into the name + if(gws != nullptr) + { + ss << " GWS[" << gws[0] << "," << gws[1] << "," << gws[2] << "]"; + } + if(lws != nullptr) + { + ss << " LWS[" << lws[0] << "," << lws[1] << "," << lws[2] << "]"; + } + + cl_event customEvent; + + // Forward to original OpenCl function + retVal = m_OriginalEnqueueFunction( command_queue, + kernel, + work_dim, + gwo, + gws, + lws, + num_events_in_wait_list, + event_wait_list, + &customEvent); + + // Store the Kernel info for later GetMeasurements() call + m_Kernels.emplace_back(ss.str(), customEvent); + + return retVal; + }; + + m_OriginalEnqueueFunction = CLSymbols::get().clEnqueueNDRangeKernel_ptr; + CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor; +} + +void OpenClTimer::Stop() +{ + CLSymbols::get().clEnqueueNDRangeKernel_ptr = m_OriginalEnqueueFunction; +} + +std::vector OpenClTimer::GetMeasurements() const +{ + std::vector measurements; + + cl_command_queue_properties clQueueProperties = CLScheduler::get().queue().getInfo(); + + int idx = 0; + for (auto& kernel : m_Kernels) + { + std::string name = std::string(this->GetName()) + "/" + std::to_string(idx++) + ": " + kernel.m_Name; + + double timeUs = 0.0; + if((clQueueProperties & CL_QUEUE_PROFILING_ENABLE) != 0) + { + // Wait for the event to finish before accessing profile results. + kernel.m_Event.wait(); + + cl_ulong start = kernel.m_Event.getProfilingInfo(); + cl_ulong end = kernel.m_Event.getProfilingInfo(); + timeUs = static_cast(end - start) / 1000.0; + } + + measurements.emplace_back(name, timeUs, Measurement::Unit::TIME_US); + } + + return measurements; +} + +} //namespace armnn diff --git a/src/backends/cl/OpenClTimer.hpp b/src/backends/cl/OpenClTimer.hpp new file mode 100644 index 0000000000..a7ae1387d9 --- /dev/null +++ b/src/backends/cl/OpenClTimer.hpp @@ -0,0 +1,59 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "Instrument.hpp" + +#include +#include + +#include +#include + +namespace armnn +{ + +/// OpenClTimer instrument that times all OpenCl kernels executed between calls to Start() and Stop(). +class OpenClTimer : public Instrument +{ +public: + OpenClTimer(); + ~OpenClTimer() = default; + + /// Start the OpenCl timer + void Start() override; + + /// Stop the OpenCl timer + void Stop() override; + + /// Get the name of the timer + /// \return Name of the timer + const char* GetName() const override { return "OpenClKernelTimer"; } + + /// Get the recorded measurements. This will be a list of the execution durations for all the OpenCl kernels. + /// \return Recorded measurements + std::vector GetMeasurements() const override; + +private: + using CLScheduler = arm_compute::CLScheduler; + using CLSymbols = arm_compute::CLSymbols; + using ClEvent = cl::Event; + using ClEnqueueFunc = decltype(CLSymbols::clEnqueueNDRangeKernel_ptr); + + /// Stores info about the OpenCl kernel + struct KernelInfo + { + KernelInfo(const std::string& name, cl_event& event) : m_Name(name), m_Event(event) {} + + std::string m_Name; + ClEvent m_Event; + }; + + std::list m_Kernels; ///< List of all kernels executed + ClEnqueueFunc m_OriginalEnqueueFunction; ///< Keep track of original OpenCl function +}; + +} //namespace armnn \ No newline at end of file diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk index 4375d9496c..205f7b5415 100644 --- a/src/backends/cl/backend.mk +++ b/src/backends/cl/backend.mk @@ -12,6 +12,7 @@ BACKEND_SOURCES := \ ClContextControl.cpp \ ClLayerSupport.cpp \ ClWorkloadFactory.cpp \ + OpenClTimer.cpp \ workloads/ClActivationWorkload.cpp \ workloads/ClAdditionWorkload.cpp \ workloads/ClBatchNormalizationFloatWorkload.cpp \ diff --git a/src/backends/cl/test/CMakeLists.txt b/src/backends/cl/test/CMakeLists.txt index d365290a6c..4936a78645 100644 --- a/src/backends/cl/test/CMakeLists.txt +++ b/src/backends/cl/test/CMakeLists.txt @@ -8,6 +8,8 @@ list(APPEND armnnClBackendUnitTests_sources ClCreateWorkloadTests.cpp ClLayerSupportTests.cpp ClLayerTests.cpp + ClMemCopyTests.cpp + OpenClTimerTest.cpp ) add_library(armnnClBackendUnitTests OBJECT ${armnnClBackendUnitTests_sources}) diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp index 66c2c2aa40..526dc68fc5 100644 --- a/src/backends/cl/test/ClCreateWorkloadTests.cpp +++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp @@ -6,13 +6,15 @@ #include "ClContextControlFixture.hpp" #include + +#include + #include #include #include #include -#include -#include +#include boost::test_tools::predicate_result CompareIClTensorHandleShape(IClTensorHandle* tensorHandle, std::initializer_list expectedDimensions) diff --git a/src/backends/cl/test/ClMemCopyTests.cpp b/src/backends/cl/test/ClMemCopyTests.cpp new file mode 100644 index 0000000000..af8a36d6c0 --- /dev/null +++ b/src/backends/cl/test/ClMemCopyTests.cpp @@ -0,0 +1,39 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include + +#include + +#include + +BOOST_AUTO_TEST_SUITE(ClMemCopy) + +BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpu) +{ + LayerTestResult result = MemCopyTest(false); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpu) +{ + LayerTestResult result = MemCopyTest(false); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpuWithSubtensors) +{ + LayerTestResult result = MemCopyTest(true); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpuWithSubtensors) +{ + LayerTestResult result = MemCopyTest(true); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/cl/test/Fp16SupportTest.cpp b/src/backends/cl/test/Fp16SupportTest.cpp new file mode 100644 index 0000000000..90bef3647b --- /dev/null +++ b/src/backends/cl/test/Fp16SupportTest.cpp @@ -0,0 +1,112 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include + +using namespace armnn; + +BOOST_AUTO_TEST_SUITE(Fp16Support) + +BOOST_AUTO_TEST_CASE(Fp16DataTypeSupport) +{ + Graph graph; + + Layer* const inputLayer1 = graph.AddLayer(1, "input1"); + Layer* const inputLayer2 = graph.AddLayer(2, "input2"); + + Layer* const additionLayer = graph.AddLayer("addition"); + Layer* const outputLayer = graph.AddLayer(0, "output"); + + TensorInfo fp16TensorInfo({1, 2, 3, 5}, armnn::DataType::Float16); + inputLayer1->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0)); + inputLayer2->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(1)); + additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + inputLayer1->GetOutputSlot().SetTensorInfo(fp16TensorInfo); + inputLayer2->GetOutputSlot().SetTensorInfo(fp16TensorInfo); + additionLayer->GetOutputSlot().SetTensorInfo(fp16TensorInfo); + + BOOST_CHECK(inputLayer1->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); + BOOST_CHECK(inputLayer2->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); + BOOST_CHECK(additionLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); +} + +BOOST_AUTO_TEST_CASE(Fp16AdditionTest) +{ + using namespace half_float::literal; + // Create runtime in which test will run + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* inputLayer1 = net->AddInputLayer(0); + IConnectableLayer* inputLayer2 = net->AddInputLayer(1); + IConnectableLayer* additionLayer = net->AddAdditionLayer(); + IConnectableLayer* outputLayer = net->AddOutputLayer(0); + + inputLayer1->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0)); + inputLayer2->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(1)); + additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + //change to float16 + TensorInfo fp16TensorInfo(TensorShape({4}), DataType::Float16); + inputLayer1->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo); + inputLayer2->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo); + additionLayer->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo); + + // optimize the network + std::vector backends = {Compute::GpuAcc}; + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); + + // Loads it into the runtime. + NetworkId netId; + runtime->LoadNetwork(netId, std::move(optNet)); + + std::vector input1Data + { + 1.0_h, 2.0_h, 3.0_h, 4.0_h + }; + + std::vector input2Data + { + 100.0_h, 200.0_h, 300.0_h, 400.0_h + }; + + InputTensors inputTensors + { + {0,ConstTensor(runtime->GetInputTensorInfo(netId, 0), input1Data.data())}, + {1,ConstTensor(runtime->GetInputTensorInfo(netId, 0), input2Data.data())} + }; + + std::vector outputData(input1Data.size()); + OutputTensors outputTensors + { + {0,Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} + }; + + // Does the inference. + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + + // Checks the results. + BOOST_TEST(outputData == std::vector({ 101.0_h, 202.0_h, 303.0_h, 404.0_h})); // Add +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/cl/test/OpenClTimerTest.cpp b/src/backends/cl/test/OpenClTimerTest.cpp new file mode 100644 index 0000000000..70ceac2a3f --- /dev/null +++ b/src/backends/cl/test/OpenClTimerTest.cpp @@ -0,0 +1,143 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#if (defined(__aarch64__)) || (defined(__x86_64__)) // disable test failing on FireFly/Armv7 + +#include + +#include +#include + +#include +#include +#include + +#include +#include + +#include + +#include +#include + +#include + +using namespace armnn; + +struct OpenClFixture +{ + // Initialising ClContextControl to ensure OpenCL is loaded correctly for each test case. + // NOTE: Profiling needs to be enabled in ClContextControl to be able to obtain execution + // times from OpenClTimer. + OpenClFixture() : m_ClContextControl(nullptr, true) {} + ~OpenClFixture() {} + + ClContextControl m_ClContextControl; +}; + +BOOST_FIXTURE_TEST_SUITE(OpenClTimerBatchNorm, OpenClFixture) +using FactoryType = ClWorkloadFactory; + +BOOST_AUTO_TEST_CASE(OpenClTimerBatchNorm) +{ + ClWorkloadFactory workloadFactory; + + const unsigned int width = 2; + const unsigned int height = 3; + const unsigned int channels = 2; + const unsigned int num = 1; + int32_t qOffset = 0; + float qScale = 0.f; + + TensorInfo inputTensorInfo({num, channels, height, width}, GetDataType()); + TensorInfo outputTensorInfo({num, channels, height, width}, GetDataType()); + TensorInfo tensorInfo({channels}, GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + tensorInfo.SetQuantizationScale(qScale); + tensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor(inputTensorInfo, + QuantizedVector(qScale, qOffset, + { + 1.f, 4.f, + 4.f, 2.f, + 1.f, 6.f, + + 1.f, 1.f, + 4.f, 1.f, + -2.f, 4.f + })); + // these values are per-channel of the input + auto mean = MakeTensor(tensorInfo, QuantizedVector(qScale, qOffset, {3, -2})); + auto variance = MakeTensor(tensorInfo, QuantizedVector(qScale, qOffset, {4, 9})); + auto beta = MakeTensor(tensorInfo, QuantizedVector(qScale, qOffset, {3, 2})); + auto gamma = MakeTensor(tensorInfo, QuantizedVector(qScale, qOffset, {2, 1})); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + BatchNormalizationQueueDescriptor data; + WorkloadInfo info; + ScopedCpuTensorHandle meanTensor(tensorInfo); + ScopedCpuTensorHandle varianceTensor(tensorInfo); + ScopedCpuTensorHandle betaTensor(tensorInfo); + ScopedCpuTensorHandle gammaTensor(tensorInfo); + + AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]); + AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]); + AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]); + AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Mean = &meanTensor; + data.m_Variance = &varianceTensor; + data.m_Beta = &betaTensor; + data.m_Gamma = &gammaTensor; + data.m_Parameters.m_Eps = 0.0f; + + // for each channel: + // substract mean, divide by standard deviation (with an epsilon to avoid div by 0) + // multiply by gamma and add beta + std::unique_ptr workload = workloadFactory.CreateBatchNormalization(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + OpenClTimer openClTimer; + + BOOST_CHECK_EQUAL(openClTimer.GetName(), "OpenClKernelTimer"); + + //Start the timer + openClTimer.Start(); + + //Execute the workload + workload->Execute(); + + //Stop the timer + openClTimer.Stop(); + + BOOST_CHECK_EQUAL(openClTimer.GetMeasurements().size(), 1); + + BOOST_CHECK_EQUAL(openClTimer.GetMeasurements().front().m_Name, + "OpenClKernelTimer/0: batchnormalization_layer_nchw GWS[1,3,2]"); + + BOOST_CHECK(openClTimer.GetMeasurements().front().m_Value > 0); + +} + +BOOST_AUTO_TEST_SUITE_END() + +#endif //aarch64 or x86_64 \ No newline at end of file diff --git a/src/backends/cl/workloads/ClWorkloadUtils.hpp b/src/backends/cl/workloads/ClWorkloadUtils.hpp index af4ccd0bb8..c765c63dce 100644 --- a/src/backends/cl/workloads/ClWorkloadUtils.hpp +++ b/src/backends/cl/workloads/ClWorkloadUtils.hpp @@ -5,11 +5,11 @@ #pragma once #include + #include +#include #include -#include "OpenClTimer.hpp" - #define ARMNN_SCOPED_PROFILING_EVENT_CL(name) \ ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::GpuAcc, \ name, \ diff --git a/src/backends/neon/CMakeLists.txt b/src/backends/neon/CMakeLists.txt index 93c7955a5f..152955aa06 100644 --- a/src/backends/neon/CMakeLists.txt +++ b/src/backends/neon/CMakeLists.txt @@ -7,11 +7,15 @@ if(ARMCOMPUTENEON) list(APPEND armnnNeonBackend_sources NeonBackend.cpp NeonBackend.hpp + NeonInterceptorScheduler.hpp + NeonInterceptorScheduler.cpp NeonLayerSupport.cpp NeonLayerSupport.hpp NeonWorkloadFactory.cpp NeonWorkloadFactory.hpp NeonTensorHandle.hpp + NeonTimer.hpp + NeonTimer.cpp ) add_subdirectory(workloads) diff --git a/src/backends/neon/NeonInterceptorScheduler.cpp b/src/backends/neon/NeonInterceptorScheduler.cpp new file mode 100644 index 0000000000..03b4670296 --- /dev/null +++ b/src/backends/neon/NeonInterceptorScheduler.cpp @@ -0,0 +1,47 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonInterceptorScheduler.hpp" + +#include + +namespace armnn{ + +NeonInterceptorScheduler::NeonInterceptorScheduler(arm_compute::IScheduler &realScheduler) + : m_Kernels(nullptr), m_RealScheduler(realScheduler) +{ +} + +void NeonInterceptorScheduler::set_num_threads(unsigned int numThreads) +{ + m_RealScheduler.set_num_threads(numThreads); +} + +unsigned int NeonInterceptorScheduler::num_threads() const +{ + return m_RealScheduler.num_threads(); +} + +void NeonInterceptorScheduler::schedule(arm_compute::ICPPKernel* kernel, const Hints& hints) +{ + WallClockTimer::clock::time_point startTime = WallClockTimer::clock::now(); + m_RealScheduler.schedule(kernel, hints.split_dimension()); + WallClockTimer::clock::time_point stopTime = WallClockTimer::clock::now(); + + const auto delta = std::chrono::duration(stopTime - startTime); + m_Kernels->emplace_back(kernel->name(), delta.count(), Measurement::Unit::TIME_US); +} + +void NeonInterceptorScheduler::run_workloads(std::vector & workloads) +{ + WallClockTimer::clock::time_point startTime = WallClockTimer::clock::now(); + m_RealScheduler.run_tagged_workloads(workloads, nullptr); + WallClockTimer::clock::time_point stopTime = WallClockTimer::clock::now(); + + const auto delta = std::chrono::duration(stopTime - startTime); + m_Kernels->emplace_back(std::string("Workload"), delta.count(), Measurement::Unit::TIME_US); +} + +} // namespace armnn \ No newline at end of file diff --git a/src/backends/neon/NeonInterceptorScheduler.hpp b/src/backends/neon/NeonInterceptorScheduler.hpp new file mode 100644 index 0000000000..f33b79a2da --- /dev/null +++ b/src/backends/neon/NeonInterceptorScheduler.hpp @@ -0,0 +1,38 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "NeonTimer.hpp" +#include "WallClockTimer.hpp" + +#include +#include +#include + +namespace armnn +{ + +class NeonInterceptorScheduler : public arm_compute::IScheduler +{ +public: + NeonInterceptorScheduler(arm_compute::IScheduler &realScheduler); + ~NeonInterceptorScheduler() = default; + + void set_num_threads(unsigned int numThreads) override; + + unsigned int num_threads() const override; + + void schedule(arm_compute::ICPPKernel *kernel, const Hints &hints) override; + + void run_workloads(std::vector &workloads) override; + + void SetKernels(NeonTimer::KernelMeasurements* kernels) { m_Kernels = kernels; } + NeonTimer::KernelMeasurements* GetKernels() { return m_Kernels; } +private: + NeonTimer::KernelMeasurements* m_Kernels; + arm_compute::IScheduler& m_RealScheduler; +}; + +} // namespace armnn diff --git a/src/backends/neon/NeonTimer.cpp b/src/backends/neon/NeonTimer.cpp new file mode 100644 index 0000000000..219edc9680 --- /dev/null +++ b/src/backends/neon/NeonTimer.cpp @@ -0,0 +1,63 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonTimer.hpp" +#include "NeonInterceptorScheduler.hpp" + +#include + +#include +#include + +namespace armnn +{ +namespace +{ +static thread_local auto g_Interceptor = std::make_shared(arm_compute::Scheduler::get()); +} + +void NeonTimer::Start() +{ + m_Kernels.clear(); + BOOST_ASSERT(g_Interceptor->GetKernels() == nullptr); + g_Interceptor->SetKernels(&m_Kernels); + + m_RealSchedulerType = arm_compute::Scheduler::get_type(); + //Note: We can't currently replace a custom scheduler + if(m_RealSchedulerType != arm_compute::Scheduler::Type::CUSTOM) + { + // Keep the real schedule and add NeonInterceptorScheduler as an interceptor + m_RealScheduler = &arm_compute::Scheduler::get(); + arm_compute::Scheduler::set(std::static_pointer_cast(g_Interceptor)); + } +} + +void NeonTimer::Stop() +{ + // Restore real scheduler + g_Interceptor->SetKernels(nullptr); + arm_compute::Scheduler::set(m_RealSchedulerType); + m_RealScheduler = nullptr; +} + +std::vector NeonTimer::GetMeasurements() const +{ + std::vector measurements = m_Kernels; + unsigned int kernel_number = 0; + for (auto & kernel : measurements) + { + std::string kernelName = std::string(this->GetName()) + "/" + std::to_string(kernel_number++) + ": " + kernel + .m_Name; + kernel.m_Name = kernelName; + } + return measurements; +} + +const char* NeonTimer::GetName() const +{ + return "NeonKernelTimer"; +} + +} diff --git a/src/backends/neon/NeonTimer.hpp b/src/backends/neon/NeonTimer.hpp new file mode 100644 index 0000000000..31d3e85a7c --- /dev/null +++ b/src/backends/neon/NeonTimer.hpp @@ -0,0 +1,43 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "Instrument.hpp" + +#include +#include +#include + +#include +#include +#include + +namespace armnn +{ + +class NeonTimer : public Instrument +{ +public: + using KernelMeasurements = std::vector; + + NeonTimer() = default; + ~NeonTimer() = default; + + void Start() override; + + void Stop() override; + + std::vector GetMeasurements() const override; + + const char* GetName() const override; + +private: + KernelMeasurements m_Kernels; + arm_compute::IScheduler* m_RealScheduler; + arm_compute::Scheduler::Type m_RealSchedulerType; +}; + +} \ No newline at end of file diff --git a/src/backends/neon/backend.mk b/src/backends/neon/backend.mk index 132328ba72..af83fb1321 100644 --- a/src/backends/neon/backend.mk +++ b/src/backends/neon/backend.mk @@ -9,7 +9,9 @@ BACKEND_SOURCES := \ NeonBackend.cpp \ + NeonInterceptorScheduler.cpp \ NeonLayerSupport.cpp \ + NeonTimer.cpp \ NeonWorkloadFactory.cpp \ workloads/NeonActivationWorkload.cpp \ workloads/NeonAdditionFloatWorkload.cpp \ diff --git a/src/backends/neon/test/CMakeLists.txt b/src/backends/neon/test/CMakeLists.txt index 87da01e9e2..4a3380c3f9 100644 --- a/src/backends/neon/test/CMakeLists.txt +++ b/src/backends/neon/test/CMakeLists.txt @@ -7,6 +7,8 @@ list(APPEND armnnNeonBackendUnitTests_sources NeonCreateWorkloadTests.cpp NeonLayerSupportTests.cpp NeonLayerTests.cpp + NeonMemCopyTests.cpp + NeonTimerTest.cpp ) add_library(armnnNeonBackendUnitTests OBJECT ${armnnNeonBackendUnitTests_sources}) diff --git a/src/backends/neon/test/NeonCreateWorkloadTests.cpp b/src/backends/neon/test/NeonCreateWorkloadTests.cpp index 2c4d0ae0f9..ec8fe803a1 100644 --- a/src/backends/neon/test/NeonCreateWorkloadTests.cpp +++ b/src/backends/neon/test/NeonCreateWorkloadTests.cpp @@ -3,9 +3,10 @@ // SPDX-License-Identifier: MIT // -#include - #include + +#include + #include #include #include diff --git a/src/backends/neon/test/NeonMemCopyTests.cpp b/src/backends/neon/test/NeonMemCopyTests.cpp new file mode 100644 index 0000000000..ddb47343a1 --- /dev/null +++ b/src/backends/neon/test/NeonMemCopyTests.cpp @@ -0,0 +1,39 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include + +#include + +#include + +BOOST_AUTO_TEST_SUITE(NeonMemCopy) + +BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndNeon) +{ + LayerTestResult result = MemCopyTest(false); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndCpu) +{ + LayerTestResult result = MemCopyTest(false); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndNeonWithSubtensors) +{ + LayerTestResult result = MemCopyTest(true); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndCpuWithSubtensors) +{ + LayerTestResult result = MemCopyTest(true); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/neon/test/NeonTimerTest.cpp b/src/backends/neon/test/NeonTimerTest.cpp new file mode 100644 index 0000000000..06f19c6ec3 --- /dev/null +++ b/src/backends/neon/test/NeonTimerTest.cpp @@ -0,0 +1,105 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include + +#include + +#include +#include + +#include +#include + +#include +#include +#include + +#include + +#include +#include + +using namespace armnn; + +BOOST_AUTO_TEST_SUITE(NeonTimerInstrument) + + +BOOST_AUTO_TEST_CASE(NeonTimerGetName) +{ + NeonTimer neonTimer; + BOOST_CHECK_EQUAL(neonTimer.GetName(), "NeonKernelTimer"); +} + +BOOST_AUTO_TEST_CASE(NeonTimerMeasure) +{ + NeonWorkloadFactory workloadFactory; + + unsigned int inputWidth = 4000u; + unsigned int inputHeight = 5000u; + unsigned int inputChannels = 1u; + unsigned int inputBatchSize = 1u; + + float upperBound = 1.0f; + float lowerBound = -1.0f; + + size_t inputSize = inputWidth * inputHeight * inputChannels * inputBatchSize; + std::vector inputData(inputSize, 0.f); + std::generate(inputData.begin(), inputData.end(), [](){ + return (static_cast(rand()) / static_cast(RAND_MAX / 3)) + 1.f; }); + + unsigned int outputWidth = inputWidth; + unsigned int outputHeight = inputHeight; + unsigned int outputChannels = inputChannels; + unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::GetDataType()); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::GetDataType()); + + LayerTestResult result(inputTensorInfo); + + auto input = MakeTensor(inputTensorInfo, inputData); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + // Setup bounded ReLu + armnn::ActivationQueueDescriptor descriptor; + armnn::WorkloadInfo workloadInfo; + AddInputToWorkload(descriptor, workloadInfo, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, workloadInfo, outputTensorInfo, outputHandle.get()); + + descriptor.m_Parameters.m_Function = armnn::ActivationFunction::BoundedReLu; + descriptor.m_Parameters.m_A = upperBound; + descriptor.m_Parameters.m_B = lowerBound; + + std::unique_ptr workload = workloadFactory.CreateActivation(descriptor, workloadInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + NeonTimer neonTimer; + // Start the timer. + neonTimer.Start(); + // Execute the workload. + workload->Execute(); + // Stop the timer. + neonTimer.Stop(); + + std::vector measurements = neonTimer.GetMeasurements(); + + BOOST_CHECK_EQUAL(measurements.size(), 2); + BOOST_CHECK_EQUAL(measurements[0].m_Name, "NeonKernelTimer/0: NEFillBorderKernel"); + BOOST_CHECK(measurements[0].m_Value > 0.0); + BOOST_CHECK_EQUAL(measurements[1].m_Name, "NeonKernelTimer/1: NEActivationLayerKernel"); + BOOST_CHECK(measurements[1].m_Value > 0.0); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/neon/workloads/NeonWorkloadUtils.hpp b/src/backends/neon/workloads/NeonWorkloadUtils.hpp index 22668f6f4b..c4accd6c89 100644 --- a/src/backends/neon/workloads/NeonWorkloadUtils.hpp +++ b/src/backends/neon/workloads/NeonWorkloadUtils.hpp @@ -5,9 +5,9 @@ #pragma once #include -#include -#include "NeonTimer.hpp" +#include +#include #include #include diff --git a/src/backends/test/LayerTests.cpp b/src/backends/test/LayerTests.cpp index 4b50e4b5f9..d955e42c36 100755 --- a/src/backends/test/LayerTests.cpp +++ b/src/backends/test/LayerTests.cpp @@ -16,11 +16,6 @@ #include #include -#ifdef ARMCOMPUTECL_ENABLED -#include -#include -#endif - #include #include diff --git a/src/backends/test/MemCopyTestImpl.hpp b/src/backends/test/MemCopyTestImpl.hpp new file mode 100644 index 0000000000..dab7f47915 --- /dev/null +++ b/src/backends/test/MemCopyTestImpl.hpp @@ -0,0 +1,84 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "LayerTests.hpp" +#include "TensorCopyUtils.hpp" +#include "WorkloadTestUtils.hpp" + +#include + +#include + +namespace +{ + +LayerTestResult MemCopyTest(armnn::IWorkloadFactory& srcWorkloadFactory, + armnn::IWorkloadFactory& dstWorkloadFactory, + bool withSubtensors) +{ + const std::array shapeData = { { 1u, 1u, 6u, 5u } }; + const armnn::TensorShape tensorShape(4, shapeData.data()); + const armnn::TensorInfo tensorInfo(tensorShape, armnn::DataType::Float32); + boost::multi_array inputData = MakeTensor(tensorInfo, std::vector( + { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + + 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, + + 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, + + 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, + + 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, + + 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, + }) + ); + + LayerTestResult ret(tensorInfo); + ret.outputExpected = inputData; + + boost::multi_array outputData(shapeData); + + auto inputTensorHandle = srcWorkloadFactory.CreateTensorHandle(tensorInfo); + auto outputTensorHandle = dstWorkloadFactory.CreateTensorHandle(tensorInfo); + + AllocateAndCopyDataToITensorHandle(inputTensorHandle.get(), inputData.data()); + outputTensorHandle->Allocate(); + + armnn::MemCopyQueueDescriptor memCopyQueueDesc; + armnn::WorkloadInfo workloadInfo; + + const unsigned int origin[4] = {}; + + auto workloadInput = (withSubtensors && srcWorkloadFactory.SupportsSubTensors()) + ? srcWorkloadFactory.CreateSubTensorHandle(*inputTensorHandle, tensorShape, origin) + : std::move(inputTensorHandle); + auto workloadOutput = (withSubtensors && dstWorkloadFactory.SupportsSubTensors()) + ? dstWorkloadFactory.CreateSubTensorHandle(*outputTensorHandle, tensorShape, origin) + : std::move(outputTensorHandle); + + AddInputToWorkload(memCopyQueueDesc, workloadInfo, tensorInfo, workloadInput.get()); + AddOutputToWorkload(memCopyQueueDesc, workloadInfo, tensorInfo, workloadOutput.get()); + + dstWorkloadFactory.CreateMemCopy(memCopyQueueDesc, workloadInfo)->Execute(); + + CopyDataFromITensorHandle(outputData.data(), workloadOutput.get()); + ret.output = outputData; + + return ret; +} + +template +LayerTestResult MemCopyTest(bool withSubtensors) +{ + SrcWorkloadFactory srcWorkloadFactory; + DstWorkloadFactory dstWorkloadFactory; + + return MemCopyTest(srcWorkloadFactory, dstWorkloadFactory, withSubtensors); +} + +} // anonymous namespace diff --git a/src/backends/test/MemCopyTests.cpp b/src/backends/test/MemCopyTests.cpp deleted file mode 100644 index f66caffd92..0000000000 --- a/src/backends/test/MemCopyTests.cpp +++ /dev/null @@ -1,180 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include -#include - -#include -#include -#if ARMCOMPUTECL_ENABLED -#include -#endif -#if ARMCOMPUTENEON_ENABLED -#include -#endif -#include -#include - -#include "TensorCopyUtils.hpp" -#include "WorkloadTestUtils.hpp" - -#if ARMCOMPUTECL_ENABLED || ARMCOMPUTENEON_ENABLED -#include -#endif - -BOOST_AUTO_TEST_SUITE(MemCopyTestSuite) - -void MemCopyTest(armnn::IWorkloadFactory& srcWorkloadFactory, armnn::IWorkloadFactory& dstWorkloadFactory, - bool withSubtensors) -{ - const std::array shapeData = { { 1u, 1u, 6u, 5u } }; - const armnn::TensorShape tensorShape(4, shapeData.data()); - const armnn::TensorInfo tensorInfo(tensorShape, armnn::DataType::Float32); - boost::multi_array inputData = MakeTensor(tensorInfo, std::vector( - { - 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, - - 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, - - 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, - - 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, - - 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, - - 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, - }) - ); - - boost::multi_array outputData(shapeData); - - auto inputTensorHandle = srcWorkloadFactory.CreateTensorHandle(tensorInfo); - auto outputTensorHandle = dstWorkloadFactory.CreateTensorHandle(tensorInfo); - - AllocateAndCopyDataToITensorHandle(inputTensorHandle.get(), inputData.data()); - outputTensorHandle->Allocate(); - - armnn::MemCopyQueueDescriptor memCopyQueueDesc; - armnn::WorkloadInfo workloadInfo; - - const unsigned int origin[4] = {}; - - auto workloadInput = (withSubtensors && srcWorkloadFactory.SupportsSubTensors()) - ? srcWorkloadFactory.CreateSubTensorHandle(*inputTensorHandle, tensorShape, origin) - : std::move(inputTensorHandle); - auto workloadOutput = (withSubtensors && dstWorkloadFactory.SupportsSubTensors()) - ? dstWorkloadFactory.CreateSubTensorHandle(*outputTensorHandle, tensorShape, origin) - : std::move(outputTensorHandle); - - AddInputToWorkload(memCopyQueueDesc, workloadInfo, tensorInfo, workloadInput.get()); - AddOutputToWorkload(memCopyQueueDesc, workloadInfo, tensorInfo, workloadOutput.get()); - - dstWorkloadFactory.CreateMemCopy(memCopyQueueDesc, workloadInfo)->Execute(); - - CopyDataFromITensorHandle(outputData.data(), workloadOutput.get()); - - BOOST_TEST(CompareTensors(inputData, outputData)); -} - -template -void MemCopyTest(bool withSubtensors) -{ - SrcWorkloadFactory srcWorkloadFactory; - DstWorkloadFactory dstWorkloadFactory; - MemCopyTest(srcWorkloadFactory, dstWorkloadFactory, withSubtensors); -} - -#if ARMCOMPUTECL_ENABLED || ARMCOMPUTENEON_ENABLED - -BOOST_AUTO_TEST_CASE(AclTypeConversions) -{ - arm_compute::Strides strides(1,2,3,4); - armnn::TensorShape convertedStrides = armnn::armcomputetensorutils::GetStrides(strides); - BOOST_TEST(convertedStrides[0] == 4); - BOOST_TEST(convertedStrides[1] == 3); - BOOST_TEST(convertedStrides[2] == 2); - BOOST_TEST(convertedStrides[3] == 1); - - arm_compute::TensorShape shape(5,6,7,8); - armnn::TensorShape convertedshape = armnn::armcomputetensorutils::GetShape(shape); - BOOST_TEST(convertedshape[0] == 8); - BOOST_TEST(convertedshape[1] == 7); - BOOST_TEST(convertedshape[2] == 6); - BOOST_TEST(convertedshape[3] == 5); -} -#endif - -#if ARMCOMPUTECL_ENABLED - -BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpu) -{ - MemCopyTest(false); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpu) -{ - MemCopyTest(false); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpuWithSubtensors) -{ - MemCopyTest(true); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpuWithSubtensors) -{ - MemCopyTest(true); -} - -#endif // ARMCOMPUTECL_ENABLED - -#if ARMCOMPUTENEON_ENABLED - -BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndNeon) -{ - MemCopyTest(false); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndCpu) -{ - MemCopyTest(false); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndNeonWithSubtensors) -{ - MemCopyTest(true); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndCpuWithSubtensors) -{ - MemCopyTest(true); -} - -#endif // ARMCOMPUTENEON_ENABLED - -#if ARMCOMPUTECL_ENABLED && ARMCOMPUTENEON_ENABLED - -BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndGpu) -{ - MemCopyTest(false); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndNeon) -{ - MemCopyTest(false); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndGpuWithSubtensors) -{ - MemCopyTest(true); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndNeonWithSubtensors) -{ - MemCopyTest(true); -} - -#endif - -BOOST_AUTO_TEST_SUITE_END() -- cgit v1.2.1