From 3b278e9261bd0de67c82f7d6c36731f118124f52 Mon Sep 17 00:00:00 2001 From: Aron Virginas-Tar Date: Fri, 12 Oct 2018 13:00:55 +0100 Subject: IVGCVSW-1938: Move backend-specific source files to the corresponding backend Change-Id: I558a9a007604afc55e536d877f8da7d0215cc9c3 --- src/backends/aclCommon/CMakeLists.txt | 2 + src/backends/aclCommon/common.cmake | 1 + src/backends/aclCommon/test/CMakeLists.txt | 14 ++ .../aclCommon/test/CreateWorkloadClNeon.hpp | 107 ++++++++++++ src/backends/aclCommon/test/MemCopyTests.cpp | 62 +++++++ src/backends/cl/CMakeLists.txt | 5 + src/backends/cl/OpenClTimer.cpp | 105 ++++++++++++ src/backends/cl/OpenClTimer.hpp | 59 +++++++ src/backends/cl/backend.mk | 1 + src/backends/cl/test/CMakeLists.txt | 2 + src/backends/cl/test/ClCreateWorkloadTests.cpp | 6 +- src/backends/cl/test/ClMemCopyTests.cpp | 39 +++++ src/backends/cl/test/Fp16SupportTest.cpp | 112 +++++++++++++ src/backends/cl/test/OpenClTimerTest.cpp | 143 ++++++++++++++++ src/backends/cl/workloads/ClWorkloadUtils.hpp | 4 +- src/backends/neon/CMakeLists.txt | 4 + src/backends/neon/NeonInterceptorScheduler.cpp | 47 ++++++ src/backends/neon/NeonInterceptorScheduler.hpp | 38 +++++ src/backends/neon/NeonTimer.cpp | 63 ++++++++ src/backends/neon/NeonTimer.hpp | 43 +++++ src/backends/neon/backend.mk | 2 + src/backends/neon/test/CMakeLists.txt | 2 + src/backends/neon/test/NeonCreateWorkloadTests.cpp | 5 +- src/backends/neon/test/NeonMemCopyTests.cpp | 39 +++++ src/backends/neon/test/NeonTimerTest.cpp | 105 ++++++++++++ src/backends/neon/workloads/NeonWorkloadUtils.hpp | 4 +- src/backends/test/LayerTests.cpp | 5 - src/backends/test/MemCopyTestImpl.hpp | 84 ++++++++++ src/backends/test/MemCopyTests.cpp | 180 --------------------- 29 files changed, 1090 insertions(+), 193 deletions(-) create mode 100644 src/backends/aclCommon/test/CMakeLists.txt create mode 100644 src/backends/aclCommon/test/CreateWorkloadClNeon.hpp create mode 100644 src/backends/aclCommon/test/MemCopyTests.cpp create mode 100644 src/backends/cl/OpenClTimer.cpp create mode 100644 src/backends/cl/OpenClTimer.hpp create mode 100644 src/backends/cl/test/ClMemCopyTests.cpp create mode 100644 src/backends/cl/test/Fp16SupportTest.cpp create mode 100644 src/backends/cl/test/OpenClTimerTest.cpp create mode 100644 src/backends/neon/NeonInterceptorScheduler.cpp create mode 100644 src/backends/neon/NeonInterceptorScheduler.hpp create mode 100644 src/backends/neon/NeonTimer.cpp create mode 100644 src/backends/neon/NeonTimer.hpp create mode 100644 src/backends/neon/test/NeonMemCopyTests.cpp create mode 100644 src/backends/neon/test/NeonTimerTest.cpp create mode 100644 src/backends/test/MemCopyTestImpl.hpp delete mode 100644 src/backends/test/MemCopyTests.cpp (limited to 'src/backends') diff --git a/src/backends/aclCommon/CMakeLists.txt b/src/backends/aclCommon/CMakeLists.txt index 42f914263a..6f99f4f146 100644 --- a/src/backends/aclCommon/CMakeLists.txt +++ b/src/backends/aclCommon/CMakeLists.txt @@ -9,6 +9,8 @@ list(APPEND armnnAclCommon_sources ArmComputeUtils.hpp ) +add_subdirectory(test) + add_library(armnnAclCommon STATIC ${armnnAclCommon_sources}) target_include_directories(armnnAclCommon PRIVATE ${PROJECT_SOURCE_DIR}/src) target_include_directories(armnnAclCommon PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) diff --git a/src/backends/aclCommon/common.cmake b/src/backends/aclCommon/common.cmake index d9d035f307..89be236a7f 100644 --- a/src/backends/aclCommon/common.cmake +++ b/src/backends/aclCommon/common.cmake @@ -6,4 +6,5 @@ if(ARMCOMPUTENEON OR ARMCOMPUTECL) add_subdirectory(${PROJECT_SOURCE_DIR}/src/backends/aclCommon) list(APPEND armnnLibraries armnnAclCommon) + list(APPEND armnnUnitTestLibraries armnnAclCommonUnitTests) endif() diff --git a/src/backends/aclCommon/test/CMakeLists.txt b/src/backends/aclCommon/test/CMakeLists.txt new file mode 100644 index 0000000000..98008edeb5 --- /dev/null +++ b/src/backends/aclCommon/test/CMakeLists.txt @@ -0,0 +1,14 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +list(APPEND armnnAclCommonUnitTests_sources + CreateWorkloadClNeon.hpp + MemCopyTests.cpp +) + +add_library(armnnAclCommonUnitTests OBJECT ${armnnAclCommonUnitTests_sources}) +target_include_directories(armnnAclCommonUnitTests PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_include_directories(armnnAclCommonUnitTests PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) +target_include_directories(armnnAclCommonUnitTests PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) \ No newline at end of file diff --git a/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp b/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp new file mode 100644 index 0000000000..a79cfe6b0d --- /dev/null +++ b/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp @@ -0,0 +1,107 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include + +#include +#include + +#if ARMCOMPUTECL_ENABLED +#include +#endif + +#if ARMCOMPUTENEON_ENABLED +#include +#endif + +using namespace armnn; + +namespace +{ + +using namespace std; + +template +boost::test_tools::predicate_result CompareTensorHandleShape(IComputeTensorHandle* tensorHandle, + std::initializer_list expectedDimensions) +{ + arm_compute::ITensorInfo* info = tensorHandle->GetTensor().info(); + + auto infoNumDims = info->num_dimensions(); + auto numExpectedDims = expectedDimensions.size(); + if (infoNumDims != numExpectedDims) + { + boost::test_tools::predicate_result res(false); + res.message() << "Different number of dimensions [" << info->num_dimensions() + << "!=" << expectedDimensions.size() << "]"; + return res; + } + + size_t i = info->num_dimensions() - 1; + + for (unsigned int expectedDimension : expectedDimensions) + { + if (info->dimension(i) != expectedDimension) + { + boost::test_tools::predicate_result res(false); + res.message() << "Different dimension [" << info->dimension(i) << "!=" << expectedDimension << "]"; + return res; + } + + i--; + } + + return true; +} + +template +void CreateMemCopyWorkloads(IWorkloadFactory& factory) +{ + Graph graph; + RefWorkloadFactory refFactory; + + // Creates the layers we're testing. + Layer* const layer1 = graph.AddLayer("layer1"); + Layer* const layer2 = graph.AddLayer("layer2"); + + // Creates extra layers. + Layer* const input = graph.AddLayer(0, "input"); + Layer* const output = graph.AddLayer(0, "output"); + + // Connects up. + TensorInfo tensorInfo({2, 3}, DataType::Float32); + Connect(input, layer1, tensorInfo); + Connect(layer1, layer2, tensorInfo); + Connect(layer2, output, tensorInfo); + + input->CreateTensorHandles(graph, refFactory); + layer1->CreateTensorHandles(graph, factory); + layer2->CreateTensorHandles(graph, refFactory); + output->CreateTensorHandles(graph, refFactory); + + // make the workloads and check them + auto workload1 = MakeAndCheckWorkload(*layer1, graph, factory); + auto workload2 = MakeAndCheckWorkload(*layer2, graph, refFactory); + + MemCopyQueueDescriptor queueDescriptor1 = workload1->GetData(); + BOOST_TEST(queueDescriptor1.m_Inputs.size() == 1); + BOOST_TEST(queueDescriptor1.m_Outputs.size() == 1); + auto inputHandle1 = boost::polymorphic_downcast(queueDescriptor1.m_Inputs[0]); + auto outputHandle1 = boost::polymorphic_downcast(queueDescriptor1.m_Outputs[0]); + BOOST_TEST((inputHandle1->GetTensorInfo() == TensorInfo({2, 3}, DataType::Float32))); + BOOST_TEST(CompareTensorHandleShape(outputHandle1, {2, 3})); + + + MemCopyQueueDescriptor queueDescriptor2 = workload2->GetData(); + BOOST_TEST(queueDescriptor2.m_Inputs.size() == 1); + BOOST_TEST(queueDescriptor2.m_Outputs.size() == 1); + auto inputHandle2 = boost::polymorphic_downcast(queueDescriptor2.m_Inputs[0]); + auto outputHandle2 = boost::polymorphic_downcast(queueDescriptor2.m_Outputs[0]); + BOOST_TEST(CompareTensorHandleShape(inputHandle2, {2, 3})); + BOOST_TEST((outputHandle2->GetTensorInfo() == TensorInfo({2, 3}, DataType::Float32))); +} + +} //namespace \ No newline at end of file diff --git a/src/backends/aclCommon/test/MemCopyTests.cpp b/src/backends/aclCommon/test/MemCopyTests.cpp new file mode 100644 index 0000000000..8ecdb1014d --- /dev/null +++ b/src/backends/aclCommon/test/MemCopyTests.cpp @@ -0,0 +1,62 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include +#include +#include + +#include + +BOOST_AUTO_TEST_SUITE(MemCopyTestSuite) + +BOOST_AUTO_TEST_CASE(AclTypeConversions) +{ + arm_compute::Strides strides(1, 2, 3, 4); + armnn::TensorShape convertedStrides = armnn::armcomputetensorutils::GetStrides(strides); + + BOOST_TEST(convertedStrides[0] == 4); + BOOST_TEST(convertedStrides[1] == 3); + BOOST_TEST(convertedStrides[2] == 2); + BOOST_TEST(convertedStrides[3] == 1); + + arm_compute::TensorShape shape(5, 6, 7, 8); + armnn::TensorShape convertedshape = armnn::armcomputetensorutils::GetShape(shape); + + BOOST_TEST(convertedshape[0] == 8); + BOOST_TEST(convertedshape[1] == 7); + BOOST_TEST(convertedshape[2] == 6); + BOOST_TEST(convertedshape[3] == 5); +} + +#if ARMCOMPUTECL_ENABLED && ARMCOMPUTENEON_ENABLED + +BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndGpu) +{ + LayerTestResult result = MemCopyTest(false); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndNeon) +{ + LayerTestResult result = MemCopyTest(false); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndGpuWithSubtensors) +{ + LayerTestResult result = MemCopyTest(true); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndNeonWithSubtensors) +{ + LayerTestResult result = MemCopyTest(true); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +#endif + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/cl/CMakeLists.txt b/src/backends/cl/CMakeLists.txt index 04da6ddcff..2f32081dfe 100644 --- a/src/backends/cl/CMakeLists.txt +++ b/src/backends/cl/CMakeLists.txt @@ -15,6 +15,11 @@ list(APPEND armnnClBackend_sources ) if(ARMCOMPUTECL) + list(APPEND armnnClBackend_sources + OpenClTimer.cpp + OpenClTimer.hpp + ) + add_subdirectory(workloads) add_subdirectory(test) endif() diff --git a/src/backends/cl/OpenClTimer.cpp b/src/backends/cl/OpenClTimer.cpp new file mode 100644 index 0000000000..57552d7bd9 --- /dev/null +++ b/src/backends/cl/OpenClTimer.cpp @@ -0,0 +1,105 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "OpenClTimer.hpp" + +#include +#include + +namespace armnn +{ + +OpenClTimer::OpenClTimer() +{ +} + +void OpenClTimer::Start() +{ + m_Kernels.clear(); + + auto interceptor = [this]( cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t *gwo, + const size_t *gws, + const size_t *lws, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) + { + cl_int retVal = 0; + + // Get the name of the kernel + cl::Kernel retainedKernel(kernel, true); + std::stringstream ss; + ss << retainedKernel.getInfo(); + + // Embed workgroup sizes into the name + if(gws != nullptr) + { + ss << " GWS[" << gws[0] << "," << gws[1] << "," << gws[2] << "]"; + } + if(lws != nullptr) + { + ss << " LWS[" << lws[0] << "," << lws[1] << "," << lws[2] << "]"; + } + + cl_event customEvent; + + // Forward to original OpenCl function + retVal = m_OriginalEnqueueFunction( command_queue, + kernel, + work_dim, + gwo, + gws, + lws, + num_events_in_wait_list, + event_wait_list, + &customEvent); + + // Store the Kernel info for later GetMeasurements() call + m_Kernels.emplace_back(ss.str(), customEvent); + + return retVal; + }; + + m_OriginalEnqueueFunction = CLSymbols::get().clEnqueueNDRangeKernel_ptr; + CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor; +} + +void OpenClTimer::Stop() +{ + CLSymbols::get().clEnqueueNDRangeKernel_ptr = m_OriginalEnqueueFunction; +} + +std::vector OpenClTimer::GetMeasurements() const +{ + std::vector measurements; + + cl_command_queue_properties clQueueProperties = CLScheduler::get().queue().getInfo(); + + int idx = 0; + for (auto& kernel : m_Kernels) + { + std::string name = std::string(this->GetName()) + "/" + std::to_string(idx++) + ": " + kernel.m_Name; + + double timeUs = 0.0; + if((clQueueProperties & CL_QUEUE_PROFILING_ENABLE) != 0) + { + // Wait for the event to finish before accessing profile results. + kernel.m_Event.wait(); + + cl_ulong start = kernel.m_Event.getProfilingInfo(); + cl_ulong end = kernel.m_Event.getProfilingInfo(); + timeUs = static_cast(end - start) / 1000.0; + } + + measurements.emplace_back(name, timeUs, Measurement::Unit::TIME_US); + } + + return measurements; +} + +} //namespace armnn diff --git a/src/backends/cl/OpenClTimer.hpp b/src/backends/cl/OpenClTimer.hpp new file mode 100644 index 0000000000..a7ae1387d9 --- /dev/null +++ b/src/backends/cl/OpenClTimer.hpp @@ -0,0 +1,59 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "Instrument.hpp" + +#include +#include + +#include +#include + +namespace armnn +{ + +/// OpenClTimer instrument that times all OpenCl kernels executed between calls to Start() and Stop(). +class OpenClTimer : public Instrument +{ +public: + OpenClTimer(); + ~OpenClTimer() = default; + + /// Start the OpenCl timer + void Start() override; + + /// Stop the OpenCl timer + void Stop() override; + + /// Get the name of the timer + /// \return Name of the timer + const char* GetName() const override { return "OpenClKernelTimer"; } + + /// Get the recorded measurements. This will be a list of the execution durations for all the OpenCl kernels. + /// \return Recorded measurements + std::vector GetMeasurements() const override; + +private: + using CLScheduler = arm_compute::CLScheduler; + using CLSymbols = arm_compute::CLSymbols; + using ClEvent = cl::Event; + using ClEnqueueFunc = decltype(CLSymbols::clEnqueueNDRangeKernel_ptr); + + /// Stores info about the OpenCl kernel + struct KernelInfo + { + KernelInfo(const std::string& name, cl_event& event) : m_Name(name), m_Event(event) {} + + std::string m_Name; + ClEvent m_Event; + }; + + std::list m_Kernels; ///< List of all kernels executed + ClEnqueueFunc m_OriginalEnqueueFunction; ///< Keep track of original OpenCl function +}; + +} //namespace armnn \ No newline at end of file diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk index 4375d9496c..205f7b5415 100644 --- a/src/backends/cl/backend.mk +++ b/src/backends/cl/backend.mk @@ -12,6 +12,7 @@ BACKEND_SOURCES := \ ClContextControl.cpp \ ClLayerSupport.cpp \ ClWorkloadFactory.cpp \ + OpenClTimer.cpp \ workloads/ClActivationWorkload.cpp \ workloads/ClAdditionWorkload.cpp \ workloads/ClBatchNormalizationFloatWorkload.cpp \ diff --git a/src/backends/cl/test/CMakeLists.txt b/src/backends/cl/test/CMakeLists.txt index d365290a6c..4936a78645 100644 --- a/src/backends/cl/test/CMakeLists.txt +++ b/src/backends/cl/test/CMakeLists.txt @@ -8,6 +8,8 @@ list(APPEND armnnClBackendUnitTests_sources ClCreateWorkloadTests.cpp ClLayerSupportTests.cpp ClLayerTests.cpp + ClMemCopyTests.cpp + OpenClTimerTest.cpp ) add_library(armnnClBackendUnitTests OBJECT ${armnnClBackendUnitTests_sources}) diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp index 66c2c2aa40..526dc68fc5 100644 --- a/src/backends/cl/test/ClCreateWorkloadTests.cpp +++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp @@ -6,13 +6,15 @@ #include "ClContextControlFixture.hpp" #include + +#include + #include #include #include #include -#include -#include +#include boost::test_tools::predicate_result CompareIClTensorHandleShape(IClTensorHandle* tensorHandle, std::initializer_list expectedDimensions) diff --git a/src/backends/cl/test/ClMemCopyTests.cpp b/src/backends/cl/test/ClMemCopyTests.cpp new file mode 100644 index 0000000000..af8a36d6c0 --- /dev/null +++ b/src/backends/cl/test/ClMemCopyTests.cpp @@ -0,0 +1,39 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include + +#include + +#include + +BOOST_AUTO_TEST_SUITE(ClMemCopy) + +BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpu) +{ + LayerTestResult result = MemCopyTest(false); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpu) +{ + LayerTestResult result = MemCopyTest(false); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpuWithSubtensors) +{ + LayerTestResult result = MemCopyTest(true); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpuWithSubtensors) +{ + LayerTestResult result = MemCopyTest(true); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/cl/test/Fp16SupportTest.cpp b/src/backends/cl/test/Fp16SupportTest.cpp new file mode 100644 index 0000000000..90bef3647b --- /dev/null +++ b/src/backends/cl/test/Fp16SupportTest.cpp @@ -0,0 +1,112 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include + +using namespace armnn; + +BOOST_AUTO_TEST_SUITE(Fp16Support) + +BOOST_AUTO_TEST_CASE(Fp16DataTypeSupport) +{ + Graph graph; + + Layer* const inputLayer1 = graph.AddLayer(1, "input1"); + Layer* const inputLayer2 = graph.AddLayer(2, "input2"); + + Layer* const additionLayer = graph.AddLayer("addition"); + Layer* const outputLayer = graph.AddLayer(0, "output"); + + TensorInfo fp16TensorInfo({1, 2, 3, 5}, armnn::DataType::Float16); + inputLayer1->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0)); + inputLayer2->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(1)); + additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + inputLayer1->GetOutputSlot().SetTensorInfo(fp16TensorInfo); + inputLayer2->GetOutputSlot().SetTensorInfo(fp16TensorInfo); + additionLayer->GetOutputSlot().SetTensorInfo(fp16TensorInfo); + + BOOST_CHECK(inputLayer1->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); + BOOST_CHECK(inputLayer2->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); + BOOST_CHECK(additionLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); +} + +BOOST_AUTO_TEST_CASE(Fp16AdditionTest) +{ + using namespace half_float::literal; + // Create runtime in which test will run + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* inputLayer1 = net->AddInputLayer(0); + IConnectableLayer* inputLayer2 = net->AddInputLayer(1); + IConnectableLayer* additionLayer = net->AddAdditionLayer(); + IConnectableLayer* outputLayer = net->AddOutputLayer(0); + + inputLayer1->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0)); + inputLayer2->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(1)); + additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + //change to float16 + TensorInfo fp16TensorInfo(TensorShape({4}), DataType::Float16); + inputLayer1->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo); + inputLayer2->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo); + additionLayer->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo); + + // optimize the network + std::vector backends = {Compute::GpuAcc}; + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); + + // Loads it into the runtime. + NetworkId netId; + runtime->LoadNetwork(netId, std::move(optNet)); + + std::vector input1Data + { + 1.0_h, 2.0_h, 3.0_h, 4.0_h + }; + + std::vector input2Data + { + 100.0_h, 200.0_h, 300.0_h, 400.0_h + }; + + InputTensors inputTensors + { + {0,ConstTensor(runtime->GetInputTensorInfo(netId, 0), input1Data.data())}, + {1,ConstTensor(runtime->GetInputTensorInfo(netId, 0), input2Data.data())} + }; + + std::vector outputData(input1Data.size()); + OutputTensors outputTensors + { + {0,Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} + }; + + // Does the inference. + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + + // Checks the results. + BOOST_TEST(outputData == std::vector({ 101.0_h, 202.0_h, 303.0_h, 404.0_h})); // Add +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/cl/test/OpenClTimerTest.cpp b/src/backends/cl/test/OpenClTimerTest.cpp new file mode 100644 index 0000000000..70ceac2a3f --- /dev/null +++ b/src/backends/cl/test/OpenClTimerTest.cpp @@ -0,0 +1,143 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#if (defined(__aarch64__)) || (defined(__x86_64__)) // disable test failing on FireFly/Armv7 + +#include + +#include +#include + +#include +#include +#include + +#include +#include + +#include + +#include +#include + +#include + +using namespace armnn; + +struct OpenClFixture +{ + // Initialising ClContextControl to ensure OpenCL is loaded correctly for each test case. + // NOTE: Profiling needs to be enabled in ClContextControl to be able to obtain execution + // times from OpenClTimer. + OpenClFixture() : m_ClContextControl(nullptr, true) {} + ~OpenClFixture() {} + + ClContextControl m_ClContextControl; +}; + +BOOST_FIXTURE_TEST_SUITE(OpenClTimerBatchNorm, OpenClFixture) +using FactoryType = ClWorkloadFactory; + +BOOST_AUTO_TEST_CASE(OpenClTimerBatchNorm) +{ + ClWorkloadFactory workloadFactory; + + const unsigned int width = 2; + const unsigned int height = 3; + const unsigned int channels = 2; + const unsigned int num = 1; + int32_t qOffset = 0; + float qScale = 0.f; + + TensorInfo inputTensorInfo({num, channels, height, width}, GetDataType()); + TensorInfo outputTensorInfo({num, channels, height, width}, GetDataType()); + TensorInfo tensorInfo({channels}, GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + tensorInfo.SetQuantizationScale(qScale); + tensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor(inputTensorInfo, + QuantizedVector(qScale, qOffset, + { + 1.f, 4.f, + 4.f, 2.f, + 1.f, 6.f, + + 1.f, 1.f, + 4.f, 1.f, + -2.f, 4.f + })); + // these values are per-channel of the input + auto mean = MakeTensor(tensorInfo, QuantizedVector(qScale, qOffset, {3, -2})); + auto variance = MakeTensor(tensorInfo, QuantizedVector(qScale, qOffset, {4, 9})); + auto beta = MakeTensor(tensorInfo, QuantizedVector(qScale, qOffset, {3, 2})); + auto gamma = MakeTensor(tensorInfo, QuantizedVector(qScale, qOffset, {2, 1})); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + BatchNormalizationQueueDescriptor data; + WorkloadInfo info; + ScopedCpuTensorHandle meanTensor(tensorInfo); + ScopedCpuTensorHandle varianceTensor(tensorInfo); + ScopedCpuTensorHandle betaTensor(tensorInfo); + ScopedCpuTensorHandle gammaTensor(tensorInfo); + + AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]); + AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]); + AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]); + AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Mean = &meanTensor; + data.m_Variance = &varianceTensor; + data.m_Beta = &betaTensor; + data.m_Gamma = &gammaTensor; + data.m_Parameters.m_Eps = 0.0f; + + // for each channel: + // substract mean, divide by standard deviation (with an epsilon to avoid div by 0) + // multiply by gamma and add beta + std::unique_ptr workload = workloadFactory.CreateBatchNormalization(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + OpenClTimer openClTimer; + + BOOST_CHECK_EQUAL(openClTimer.GetName(), "OpenClKernelTimer"); + + //Start the timer + openClTimer.Start(); + + //Execute the workload + workload->Execute(); + + //Stop the timer + openClTimer.Stop(); + + BOOST_CHECK_EQUAL(openClTimer.GetMeasurements().size(), 1); + + BOOST_CHECK_EQUAL(openClTimer.GetMeasurements().front().m_Name, + "OpenClKernelTimer/0: batchnormalization_layer_nchw GWS[1,3,2]"); + + BOOST_CHECK(openClTimer.GetMeasurements().front().m_Value > 0); + +} + +BOOST_AUTO_TEST_SUITE_END() + +#endif //aarch64 or x86_64 \ No newline at end of file diff --git a/src/backends/cl/workloads/ClWorkloadUtils.hpp b/src/backends/cl/workloads/ClWorkloadUtils.hpp index af4ccd0bb8..c765c63dce 100644 --- a/src/backends/cl/workloads/ClWorkloadUtils.hpp +++ b/src/backends/cl/workloads/ClWorkloadUtils.hpp @@ -5,11 +5,11 @@ #pragma once #include + #include +#include #include -#include "OpenClTimer.hpp" - #define ARMNN_SCOPED_PROFILING_EVENT_CL(name) \ ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::GpuAcc, \ name, \ diff --git a/src/backends/neon/CMakeLists.txt b/src/backends/neon/CMakeLists.txt index 93c7955a5f..152955aa06 100644 --- a/src/backends/neon/CMakeLists.txt +++ b/src/backends/neon/CMakeLists.txt @@ -7,11 +7,15 @@ if(ARMCOMPUTENEON) list(APPEND armnnNeonBackend_sources NeonBackend.cpp NeonBackend.hpp + NeonInterceptorScheduler.hpp + NeonInterceptorScheduler.cpp NeonLayerSupport.cpp NeonLayerSupport.hpp NeonWorkloadFactory.cpp NeonWorkloadFactory.hpp NeonTensorHandle.hpp + NeonTimer.hpp + NeonTimer.cpp ) add_subdirectory(workloads) diff --git a/src/backends/neon/NeonInterceptorScheduler.cpp b/src/backends/neon/NeonInterceptorScheduler.cpp new file mode 100644 index 0000000000..03b4670296 --- /dev/null +++ b/src/backends/neon/NeonInterceptorScheduler.cpp @@ -0,0 +1,47 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonInterceptorScheduler.hpp" + +#include + +namespace armnn{ + +NeonInterceptorScheduler::NeonInterceptorScheduler(arm_compute::IScheduler &realScheduler) + : m_Kernels(nullptr), m_RealScheduler(realScheduler) +{ +} + +void NeonInterceptorScheduler::set_num_threads(unsigned int numThreads) +{ + m_RealScheduler.set_num_threads(numThreads); +} + +unsigned int NeonInterceptorScheduler::num_threads() const +{ + return m_RealScheduler.num_threads(); +} + +void NeonInterceptorScheduler::schedule(arm_compute::ICPPKernel* kernel, const Hints& hints) +{ + WallClockTimer::clock::time_point startTime = WallClockTimer::clock::now(); + m_RealScheduler.schedule(kernel, hints.split_dimension()); + WallClockTimer::clock::time_point stopTime = WallClockTimer::clock::now(); + + const auto delta = std::chrono::duration(stopTime - startTime); + m_Kernels->emplace_back(kernel->name(), delta.count(), Measurement::Unit::TIME_US); +} + +void NeonInterceptorScheduler::run_workloads(std::vector & workloads) +{ + WallClockTimer::clock::time_point startTime = WallClockTimer::clock::now(); + m_RealScheduler.run_tagged_workloads(workloads, nullptr); + WallClockTimer::clock::time_point stopTime = WallClockTimer::clock::now(); + + const auto delta = std::chrono::duration(stopTime - startTime); + m_Kernels->emplace_back(std::string("Workload"), delta.count(), Measurement::Unit::TIME_US); +} + +} // namespace armnn \ No newline at end of file diff --git a/src/backends/neon/NeonInterceptorScheduler.hpp b/src/backends/neon/NeonInterceptorScheduler.hpp new file mode 100644 index 0000000000..f33b79a2da --- /dev/null +++ b/src/backends/neon/NeonInterceptorScheduler.hpp @@ -0,0 +1,38 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "NeonTimer.hpp" +#include "WallClockTimer.hpp" + +#include +#include +#include + +namespace armnn +{ + +class NeonInterceptorScheduler : public arm_compute::IScheduler +{ +public: + NeonInterceptorScheduler(arm_compute::IScheduler &realScheduler); + ~NeonInterceptorScheduler() = default; + + void set_num_threads(unsigned int numThreads) override; + + unsigned int num_threads() const override; + + void schedule(arm_compute::ICPPKernel *kernel, const Hints &hints) override; + + void run_workloads(std::vector &workloads) override; + + void SetKernels(NeonTimer::KernelMeasurements* kernels) { m_Kernels = kernels; } + NeonTimer::KernelMeasurements* GetKernels() { return m_Kernels; } +private: + NeonTimer::KernelMeasurements* m_Kernels; + arm_compute::IScheduler& m_RealScheduler; +}; + +} // namespace armnn diff --git a/src/backends/neon/NeonTimer.cpp b/src/backends/neon/NeonTimer.cpp new file mode 100644 index 0000000000..219edc9680 --- /dev/null +++ b/src/backends/neon/NeonTimer.cpp @@ -0,0 +1,63 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonTimer.hpp" +#include "NeonInterceptorScheduler.hpp" + +#include + +#include +#include + +namespace armnn +{ +namespace +{ +static thread_local auto g_Interceptor = std::make_shared(arm_compute::Scheduler::get()); +} + +void NeonTimer::Start() +{ + m_Kernels.clear(); + BOOST_ASSERT(g_Interceptor->GetKernels() == nullptr); + g_Interceptor->SetKernels(&m_Kernels); + + m_RealSchedulerType = arm_compute::Scheduler::get_type(); + //Note: We can't currently replace a custom scheduler + if(m_RealSchedulerType != arm_compute::Scheduler::Type::CUSTOM) + { + // Keep the real schedule and add NeonInterceptorScheduler as an interceptor + m_RealScheduler = &arm_compute::Scheduler::get(); + arm_compute::Scheduler::set(std::static_pointer_cast(g_Interceptor)); + } +} + +void NeonTimer::Stop() +{ + // Restore real scheduler + g_Interceptor->SetKernels(nullptr); + arm_compute::Scheduler::set(m_RealSchedulerType); + m_RealScheduler = nullptr; +} + +std::vector NeonTimer::GetMeasurements() const +{ + std::vector measurements = m_Kernels; + unsigned int kernel_number = 0; + for (auto & kernel : measurements) + { + std::string kernelName = std::string(this->GetName()) + "/" + std::to_string(kernel_number++) + ": " + kernel + .m_Name; + kernel.m_Name = kernelName; + } + return measurements; +} + +const char* NeonTimer::GetName() const +{ + return "NeonKernelTimer"; +} + +} diff --git a/src/backends/neon/NeonTimer.hpp b/src/backends/neon/NeonTimer.hpp new file mode 100644 index 0000000000..31d3e85a7c --- /dev/null +++ b/src/backends/neon/NeonTimer.hpp @@ -0,0 +1,43 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "Instrument.hpp" + +#include +#include +#include + +#include +#include +#include + +namespace armnn +{ + +class NeonTimer : public Instrument +{ +public: + using KernelMeasurements = std::vector; + + NeonTimer() = default; + ~NeonTimer() = default; + + void Start() override; + + void Stop() override; + + std::vector GetMeasurements() const override; + + const char* GetName() const override; + +private: + KernelMeasurements m_Kernels; + arm_compute::IScheduler* m_RealScheduler; + arm_compute::Scheduler::Type m_RealSchedulerType; +}; + +} \ No newline at end of file diff --git a/src/backends/neon/backend.mk b/src/backends/neon/backend.mk index 132328ba72..af83fb1321 100644 --- a/src/backends/neon/backend.mk +++ b/src/backends/neon/backend.mk @@ -9,7 +9,9 @@ BACKEND_SOURCES := \ NeonBackend.cpp \ + NeonInterceptorScheduler.cpp \ NeonLayerSupport.cpp \ + NeonTimer.cpp \ NeonWorkloadFactory.cpp \ workloads/NeonActivationWorkload.cpp \ workloads/NeonAdditionFloatWorkload.cpp \ diff --git a/src/backends/neon/test/CMakeLists.txt b/src/backends/neon/test/CMakeLists.txt index 87da01e9e2..4a3380c3f9 100644 --- a/src/backends/neon/test/CMakeLists.txt +++ b/src/backends/neon/test/CMakeLists.txt @@ -7,6 +7,8 @@ list(APPEND armnnNeonBackendUnitTests_sources NeonCreateWorkloadTests.cpp NeonLayerSupportTests.cpp NeonLayerTests.cpp + NeonMemCopyTests.cpp + NeonTimerTest.cpp ) add_library(armnnNeonBackendUnitTests OBJECT ${armnnNeonBackendUnitTests_sources}) diff --git a/src/backends/neon/test/NeonCreateWorkloadTests.cpp b/src/backends/neon/test/NeonCreateWorkloadTests.cpp index 2c4d0ae0f9..ec8fe803a1 100644 --- a/src/backends/neon/test/NeonCreateWorkloadTests.cpp +++ b/src/backends/neon/test/NeonCreateWorkloadTests.cpp @@ -3,9 +3,10 @@ // SPDX-License-Identifier: MIT // -#include - #include + +#include + #include #include #include diff --git a/src/backends/neon/test/NeonMemCopyTests.cpp b/src/backends/neon/test/NeonMemCopyTests.cpp new file mode 100644 index 0000000000..ddb47343a1 --- /dev/null +++ b/src/backends/neon/test/NeonMemCopyTests.cpp @@ -0,0 +1,39 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include + +#include + +#include + +BOOST_AUTO_TEST_SUITE(NeonMemCopy) + +BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndNeon) +{ + LayerTestResult result = MemCopyTest(false); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndCpu) +{ + LayerTestResult result = MemCopyTest(false); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndNeonWithSubtensors) +{ + LayerTestResult result = MemCopyTest(true); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndCpuWithSubtensors) +{ + LayerTestResult result = MemCopyTest(true); + BOOST_TEST(CompareTensors(result.output, result.outputExpected)); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/neon/test/NeonTimerTest.cpp b/src/backends/neon/test/NeonTimerTest.cpp new file mode 100644 index 0000000000..06f19c6ec3 --- /dev/null +++ b/src/backends/neon/test/NeonTimerTest.cpp @@ -0,0 +1,105 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include + +#include + +#include +#include + +#include +#include + +#include +#include +#include + +#include + +#include +#include + +using namespace armnn; + +BOOST_AUTO_TEST_SUITE(NeonTimerInstrument) + + +BOOST_AUTO_TEST_CASE(NeonTimerGetName) +{ + NeonTimer neonTimer; + BOOST_CHECK_EQUAL(neonTimer.GetName(), "NeonKernelTimer"); +} + +BOOST_AUTO_TEST_CASE(NeonTimerMeasure) +{ + NeonWorkloadFactory workloadFactory; + + unsigned int inputWidth = 4000u; + unsigned int inputHeight = 5000u; + unsigned int inputChannels = 1u; + unsigned int inputBatchSize = 1u; + + float upperBound = 1.0f; + float lowerBound = -1.0f; + + size_t inputSize = inputWidth * inputHeight * inputChannels * inputBatchSize; + std::vector inputData(inputSize, 0.f); + std::generate(inputData.begin(), inputData.end(), [](){ + return (static_cast(rand()) / static_cast(RAND_MAX / 3)) + 1.f; }); + + unsigned int outputWidth = inputWidth; + unsigned int outputHeight = inputHeight; + unsigned int outputChannels = inputChannels; + unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::GetDataType()); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::GetDataType()); + + LayerTestResult result(inputTensorInfo); + + auto input = MakeTensor(inputTensorInfo, inputData); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + // Setup bounded ReLu + armnn::ActivationQueueDescriptor descriptor; + armnn::WorkloadInfo workloadInfo; + AddInputToWorkload(descriptor, workloadInfo, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, workloadInfo, outputTensorInfo, outputHandle.get()); + + descriptor.m_Parameters.m_Function = armnn::ActivationFunction::BoundedReLu; + descriptor.m_Parameters.m_A = upperBound; + descriptor.m_Parameters.m_B = lowerBound; + + std::unique_ptr workload = workloadFactory.CreateActivation(descriptor, workloadInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + NeonTimer neonTimer; + // Start the timer. + neonTimer.Start(); + // Execute the workload. + workload->Execute(); + // Stop the timer. + neonTimer.Stop(); + + std::vector measurements = neonTimer.GetMeasurements(); + + BOOST_CHECK_EQUAL(measurements.size(), 2); + BOOST_CHECK_EQUAL(measurements[0].m_Name, "NeonKernelTimer/0: NEFillBorderKernel"); + BOOST_CHECK(measurements[0].m_Value > 0.0); + BOOST_CHECK_EQUAL(measurements[1].m_Name, "NeonKernelTimer/1: NEActivationLayerKernel"); + BOOST_CHECK(measurements[1].m_Value > 0.0); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/neon/workloads/NeonWorkloadUtils.hpp b/src/backends/neon/workloads/NeonWorkloadUtils.hpp index 22668f6f4b..c4accd6c89 100644 --- a/src/backends/neon/workloads/NeonWorkloadUtils.hpp +++ b/src/backends/neon/workloads/NeonWorkloadUtils.hpp @@ -5,9 +5,9 @@ #pragma once #include -#include -#include "NeonTimer.hpp" +#include +#include #include #include diff --git a/src/backends/test/LayerTests.cpp b/src/backends/test/LayerTests.cpp index 4b50e4b5f9..d955e42c36 100755 --- a/src/backends/test/LayerTests.cpp +++ b/src/backends/test/LayerTests.cpp @@ -16,11 +16,6 @@ #include #include -#ifdef ARMCOMPUTECL_ENABLED -#include -#include -#endif - #include #include diff --git a/src/backends/test/MemCopyTestImpl.hpp b/src/backends/test/MemCopyTestImpl.hpp new file mode 100644 index 0000000000..dab7f47915 --- /dev/null +++ b/src/backends/test/MemCopyTestImpl.hpp @@ -0,0 +1,84 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "LayerTests.hpp" +#include "TensorCopyUtils.hpp" +#include "WorkloadTestUtils.hpp" + +#include + +#include + +namespace +{ + +LayerTestResult MemCopyTest(armnn::IWorkloadFactory& srcWorkloadFactory, + armnn::IWorkloadFactory& dstWorkloadFactory, + bool withSubtensors) +{ + const std::array shapeData = { { 1u, 1u, 6u, 5u } }; + const armnn::TensorShape tensorShape(4, shapeData.data()); + const armnn::TensorInfo tensorInfo(tensorShape, armnn::DataType::Float32); + boost::multi_array inputData = MakeTensor(tensorInfo, std::vector( + { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + + 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, + + 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, + + 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, + + 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, + + 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, + }) + ); + + LayerTestResult ret(tensorInfo); + ret.outputExpected = inputData; + + boost::multi_array outputData(shapeData); + + auto inputTensorHandle = srcWorkloadFactory.CreateTensorHandle(tensorInfo); + auto outputTensorHandle = dstWorkloadFactory.CreateTensorHandle(tensorInfo); + + AllocateAndCopyDataToITensorHandle(inputTensorHandle.get(), inputData.data()); + outputTensorHandle->Allocate(); + + armnn::MemCopyQueueDescriptor memCopyQueueDesc; + armnn::WorkloadInfo workloadInfo; + + const unsigned int origin[4] = {}; + + auto workloadInput = (withSubtensors && srcWorkloadFactory.SupportsSubTensors()) + ? srcWorkloadFactory.CreateSubTensorHandle(*inputTensorHandle, tensorShape, origin) + : std::move(inputTensorHandle); + auto workloadOutput = (withSubtensors && dstWorkloadFactory.SupportsSubTensors()) + ? dstWorkloadFactory.CreateSubTensorHandle(*outputTensorHandle, tensorShape, origin) + : std::move(outputTensorHandle); + + AddInputToWorkload(memCopyQueueDesc, workloadInfo, tensorInfo, workloadInput.get()); + AddOutputToWorkload(memCopyQueueDesc, workloadInfo, tensorInfo, workloadOutput.get()); + + dstWorkloadFactory.CreateMemCopy(memCopyQueueDesc, workloadInfo)->Execute(); + + CopyDataFromITensorHandle(outputData.data(), workloadOutput.get()); + ret.output = outputData; + + return ret; +} + +template +LayerTestResult MemCopyTest(bool withSubtensors) +{ + SrcWorkloadFactory srcWorkloadFactory; + DstWorkloadFactory dstWorkloadFactory; + + return MemCopyTest(srcWorkloadFactory, dstWorkloadFactory, withSubtensors); +} + +} // anonymous namespace diff --git a/src/backends/test/MemCopyTests.cpp b/src/backends/test/MemCopyTests.cpp deleted file mode 100644 index f66caffd92..0000000000 --- a/src/backends/test/MemCopyTests.cpp +++ /dev/null @@ -1,180 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include -#include - -#include -#include -#if ARMCOMPUTECL_ENABLED -#include -#endif -#if ARMCOMPUTENEON_ENABLED -#include -#endif -#include -#include - -#include "TensorCopyUtils.hpp" -#include "WorkloadTestUtils.hpp" - -#if ARMCOMPUTECL_ENABLED || ARMCOMPUTENEON_ENABLED -#include -#endif - -BOOST_AUTO_TEST_SUITE(MemCopyTestSuite) - -void MemCopyTest(armnn::IWorkloadFactory& srcWorkloadFactory, armnn::IWorkloadFactory& dstWorkloadFactory, - bool withSubtensors) -{ - const std::array shapeData = { { 1u, 1u, 6u, 5u } }; - const armnn::TensorShape tensorShape(4, shapeData.data()); - const armnn::TensorInfo tensorInfo(tensorShape, armnn::DataType::Float32); - boost::multi_array inputData = MakeTensor(tensorInfo, std::vector( - { - 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, - - 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, - - 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, - - 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, - - 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, - - 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, - }) - ); - - boost::multi_array outputData(shapeData); - - auto inputTensorHandle = srcWorkloadFactory.CreateTensorHandle(tensorInfo); - auto outputTensorHandle = dstWorkloadFactory.CreateTensorHandle(tensorInfo); - - AllocateAndCopyDataToITensorHandle(inputTensorHandle.get(), inputData.data()); - outputTensorHandle->Allocate(); - - armnn::MemCopyQueueDescriptor memCopyQueueDesc; - armnn::WorkloadInfo workloadInfo; - - const unsigned int origin[4] = {}; - - auto workloadInput = (withSubtensors && srcWorkloadFactory.SupportsSubTensors()) - ? srcWorkloadFactory.CreateSubTensorHandle(*inputTensorHandle, tensorShape, origin) - : std::move(inputTensorHandle); - auto workloadOutput = (withSubtensors && dstWorkloadFactory.SupportsSubTensors()) - ? dstWorkloadFactory.CreateSubTensorHandle(*outputTensorHandle, tensorShape, origin) - : std::move(outputTensorHandle); - - AddInputToWorkload(memCopyQueueDesc, workloadInfo, tensorInfo, workloadInput.get()); - AddOutputToWorkload(memCopyQueueDesc, workloadInfo, tensorInfo, workloadOutput.get()); - - dstWorkloadFactory.CreateMemCopy(memCopyQueueDesc, workloadInfo)->Execute(); - - CopyDataFromITensorHandle(outputData.data(), workloadOutput.get()); - - BOOST_TEST(CompareTensors(inputData, outputData)); -} - -template -void MemCopyTest(bool withSubtensors) -{ - SrcWorkloadFactory srcWorkloadFactory; - DstWorkloadFactory dstWorkloadFactory; - MemCopyTest(srcWorkloadFactory, dstWorkloadFactory, withSubtensors); -} - -#if ARMCOMPUTECL_ENABLED || ARMCOMPUTENEON_ENABLED - -BOOST_AUTO_TEST_CASE(AclTypeConversions) -{ - arm_compute::Strides strides(1,2,3,4); - armnn::TensorShape convertedStrides = armnn::armcomputetensorutils::GetStrides(strides); - BOOST_TEST(convertedStrides[0] == 4); - BOOST_TEST(convertedStrides[1] == 3); - BOOST_TEST(convertedStrides[2] == 2); - BOOST_TEST(convertedStrides[3] == 1); - - arm_compute::TensorShape shape(5,6,7,8); - armnn::TensorShape convertedshape = armnn::armcomputetensorutils::GetShape(shape); - BOOST_TEST(convertedshape[0] == 8); - BOOST_TEST(convertedshape[1] == 7); - BOOST_TEST(convertedshape[2] == 6); - BOOST_TEST(convertedshape[3] == 5); -} -#endif - -#if ARMCOMPUTECL_ENABLED - -BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpu) -{ - MemCopyTest(false); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpu) -{ - MemCopyTest(false); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpuWithSubtensors) -{ - MemCopyTest(true); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpuWithSubtensors) -{ - MemCopyTest(true); -} - -#endif // ARMCOMPUTECL_ENABLED - -#if ARMCOMPUTENEON_ENABLED - -BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndNeon) -{ - MemCopyTest(false); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndCpu) -{ - MemCopyTest(false); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndNeonWithSubtensors) -{ - MemCopyTest(true); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndCpuWithSubtensors) -{ - MemCopyTest(true); -} - -#endif // ARMCOMPUTENEON_ENABLED - -#if ARMCOMPUTECL_ENABLED && ARMCOMPUTENEON_ENABLED - -BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndGpu) -{ - MemCopyTest(false); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndNeon) -{ - MemCopyTest(false); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndGpuWithSubtensors) -{ - MemCopyTest(true); -} - -BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndNeonWithSubtensors) -{ - MemCopyTest(true); -} - -#endif - -BOOST_AUTO_TEST_SUITE_END() -- cgit v1.2.1