diff options
Diffstat (limited to 'src/armnn')
-rw-r--r-- | src/armnn/NeonInterceptorScheduler.cpp | 47 | ||||
-rw-r--r-- | src/armnn/NeonInterceptorScheduler.hpp | 38 | ||||
-rw-r--r-- | src/armnn/NeonTimer.cpp | 63 | ||||
-rw-r--r-- | src/armnn/NeonTimer.hpp | 43 | ||||
-rw-r--r-- | src/armnn/OpenClTimer.cpp | 105 | ||||
-rw-r--r-- | src/armnn/OpenClTimer.hpp | 59 | ||||
-rw-r--r-- | src/armnn/test/CreateWorkloadClNeon.hpp | 108 | ||||
-rw-r--r-- | src/armnn/test/FP16SupportTest.cpp | 115 | ||||
-rw-r--r-- | src/armnn/test/NeonTimerTest.cpp | 103 | ||||
-rw-r--r-- | src/armnn/test/OpenClTimerTest.cpp | 137 |
10 files changed, 0 insertions, 818 deletions
diff --git a/src/armnn/NeonInterceptorScheduler.cpp b/src/armnn/NeonInterceptorScheduler.cpp deleted file mode 100644 index 03b4670296..0000000000 --- a/src/armnn/NeonInterceptorScheduler.cpp +++ /dev/null @@ -1,47 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonInterceptorScheduler.hpp" - -#include <boost/assert.hpp> - -namespace armnn{ - -NeonInterceptorScheduler::NeonInterceptorScheduler(arm_compute::IScheduler &realScheduler) - : m_Kernels(nullptr), m_RealScheduler(realScheduler) -{ -} - -void NeonInterceptorScheduler::set_num_threads(unsigned int numThreads) -{ - m_RealScheduler.set_num_threads(numThreads); -} - -unsigned int NeonInterceptorScheduler::num_threads() const -{ - return m_RealScheduler.num_threads(); -} - -void NeonInterceptorScheduler::schedule(arm_compute::ICPPKernel* kernel, const Hints& hints) -{ - WallClockTimer::clock::time_point startTime = WallClockTimer::clock::now(); - m_RealScheduler.schedule(kernel, hints.split_dimension()); - WallClockTimer::clock::time_point stopTime = WallClockTimer::clock::now(); - - const auto delta = std::chrono::duration<double, std::micro>(stopTime - startTime); - m_Kernels->emplace_back(kernel->name(), delta.count(), Measurement::Unit::TIME_US); -} - -void NeonInterceptorScheduler::run_workloads(std::vector <Workload>& workloads) -{ - WallClockTimer::clock::time_point startTime = WallClockTimer::clock::now(); - m_RealScheduler.run_tagged_workloads(workloads, nullptr); - WallClockTimer::clock::time_point stopTime = WallClockTimer::clock::now(); - - const auto delta = std::chrono::duration<double, std::micro>(stopTime - startTime); - m_Kernels->emplace_back(std::string("Workload"), delta.count(), Measurement::Unit::TIME_US); -} - -} // namespace armnn
\ No newline at end of file diff --git a/src/armnn/NeonInterceptorScheduler.hpp b/src/armnn/NeonInterceptorScheduler.hpp deleted file mode 100644 index f33b79a2da..0000000000 --- a/src/armnn/NeonInterceptorScheduler.hpp +++ /dev/null @@ -1,38 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "NeonTimer.hpp" -#include "WallClockTimer.hpp" - -#include <arm_compute/runtime/IScheduler.h> -#include <arm_compute/runtime/Scheduler.h> -#include <arm_compute/core/CPP/ICPPKernel.h> - -namespace armnn -{ - -class NeonInterceptorScheduler : public arm_compute::IScheduler -{ -public: - NeonInterceptorScheduler(arm_compute::IScheduler &realScheduler); - ~NeonInterceptorScheduler() = default; - - void set_num_threads(unsigned int numThreads) override; - - unsigned int num_threads() const override; - - void schedule(arm_compute::ICPPKernel *kernel, const Hints &hints) override; - - void run_workloads(std::vector<Workload> &workloads) override; - - void SetKernels(NeonTimer::KernelMeasurements* kernels) { m_Kernels = kernels; } - NeonTimer::KernelMeasurements* GetKernels() { return m_Kernels; } -private: - NeonTimer::KernelMeasurements* m_Kernels; - arm_compute::IScheduler& m_RealScheduler; -}; - -} // namespace armnn diff --git a/src/armnn/NeonTimer.cpp b/src/armnn/NeonTimer.cpp deleted file mode 100644 index 219edc9680..0000000000 --- a/src/armnn/NeonTimer.cpp +++ /dev/null @@ -1,63 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonTimer.hpp" -#include "NeonInterceptorScheduler.hpp" - -#include <memory> - -#include <boost/assert.hpp> -#include <boost/format.hpp> - -namespace armnn -{ -namespace -{ -static thread_local auto g_Interceptor = std::make_shared<NeonInterceptorScheduler>(arm_compute::Scheduler::get()); -} - -void NeonTimer::Start() -{ - m_Kernels.clear(); - BOOST_ASSERT(g_Interceptor->GetKernels() == nullptr); - g_Interceptor->SetKernels(&m_Kernels); - - m_RealSchedulerType = arm_compute::Scheduler::get_type(); - //Note: We can't currently replace a custom scheduler - if(m_RealSchedulerType != arm_compute::Scheduler::Type::CUSTOM) - { - // Keep the real schedule and add NeonInterceptorScheduler as an interceptor - m_RealScheduler = &arm_compute::Scheduler::get(); - arm_compute::Scheduler::set(std::static_pointer_cast<arm_compute::IScheduler>(g_Interceptor)); - } -} - -void NeonTimer::Stop() -{ - // Restore real scheduler - g_Interceptor->SetKernels(nullptr); - arm_compute::Scheduler::set(m_RealSchedulerType); - m_RealScheduler = nullptr; -} - -std::vector<Measurement> NeonTimer::GetMeasurements() const -{ - std::vector<Measurement> measurements = m_Kernels; - unsigned int kernel_number = 0; - for (auto & kernel : measurements) - { - std::string kernelName = std::string(this->GetName()) + "/" + std::to_string(kernel_number++) + ": " + kernel - .m_Name; - kernel.m_Name = kernelName; - } - return measurements; -} - -const char* NeonTimer::GetName() const -{ - return "NeonKernelTimer"; -} - -} diff --git a/src/armnn/NeonTimer.hpp b/src/armnn/NeonTimer.hpp deleted file mode 100644 index 31d3e85a7c..0000000000 --- a/src/armnn/NeonTimer.hpp +++ /dev/null @@ -1,43 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "Instrument.hpp" - -#include <arm_compute/runtime/IScheduler.h> -#include <arm_compute/runtime/Scheduler.h> -#include <arm_compute/core/CPP/ICPPKernel.h> - -#include <chrono> -#include <map> -#include <list> - -namespace armnn -{ - -class NeonTimer : public Instrument -{ -public: - using KernelMeasurements = std::vector<Measurement>; - - NeonTimer() = default; - ~NeonTimer() = default; - - void Start() override; - - void Stop() override; - - std::vector<Measurement> GetMeasurements() const override; - - const char* GetName() const override; - -private: - KernelMeasurements m_Kernels; - arm_compute::IScheduler* m_RealScheduler; - arm_compute::Scheduler::Type m_RealSchedulerType; -}; - -}
\ No newline at end of file diff --git a/src/armnn/OpenClTimer.cpp b/src/armnn/OpenClTimer.cpp deleted file mode 100644 index 57552d7bd9..0000000000 --- a/src/armnn/OpenClTimer.cpp +++ /dev/null @@ -1,105 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "OpenClTimer.hpp" - -#include <string> -#include <sstream> - -namespace armnn -{ - -OpenClTimer::OpenClTimer() -{ -} - -void OpenClTimer::Start() -{ - m_Kernels.clear(); - - auto interceptor = [this]( cl_command_queue command_queue, - cl_kernel kernel, - cl_uint work_dim, - const size_t *gwo, - const size_t *gws, - const size_t *lws, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) - { - cl_int retVal = 0; - - // Get the name of the kernel - cl::Kernel retainedKernel(kernel, true); - std::stringstream ss; - ss << retainedKernel.getInfo<CL_KERNEL_FUNCTION_NAME>(); - - // Embed workgroup sizes into the name - if(gws != nullptr) - { - ss << " GWS[" << gws[0] << "," << gws[1] << "," << gws[2] << "]"; - } - if(lws != nullptr) - { - ss << " LWS[" << lws[0] << "," << lws[1] << "," << lws[2] << "]"; - } - - cl_event customEvent; - - // Forward to original OpenCl function - retVal = m_OriginalEnqueueFunction( command_queue, - kernel, - work_dim, - gwo, - gws, - lws, - num_events_in_wait_list, - event_wait_list, - &customEvent); - - // Store the Kernel info for later GetMeasurements() call - m_Kernels.emplace_back(ss.str(), customEvent); - - return retVal; - }; - - m_OriginalEnqueueFunction = CLSymbols::get().clEnqueueNDRangeKernel_ptr; - CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor; -} - -void OpenClTimer::Stop() -{ - CLSymbols::get().clEnqueueNDRangeKernel_ptr = m_OriginalEnqueueFunction; -} - -std::vector<Measurement> OpenClTimer::GetMeasurements() const -{ - std::vector<Measurement> measurements; - - cl_command_queue_properties clQueueProperties = CLScheduler::get().queue().getInfo<CL_QUEUE_PROPERTIES>(); - - int idx = 0; - for (auto& kernel : m_Kernels) - { - std::string name = std::string(this->GetName()) + "/" + std::to_string(idx++) + ": " + kernel.m_Name; - - double timeUs = 0.0; - if((clQueueProperties & CL_QUEUE_PROFILING_ENABLE) != 0) - { - // Wait for the event to finish before accessing profile results. - kernel.m_Event.wait(); - - cl_ulong start = kernel.m_Event.getProfilingInfo<CL_PROFILING_COMMAND_START>(); - cl_ulong end = kernel.m_Event.getProfilingInfo<CL_PROFILING_COMMAND_END>(); - timeUs = static_cast<double>(end - start) / 1000.0; - } - - measurements.emplace_back(name, timeUs, Measurement::Unit::TIME_US); - } - - return measurements; -} - -} //namespace armnn diff --git a/src/armnn/OpenClTimer.hpp b/src/armnn/OpenClTimer.hpp deleted file mode 100644 index ca044a405e..0000000000 --- a/src/armnn/OpenClTimer.hpp +++ /dev/null @@ -1,59 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "Instrument.hpp" - -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/core/CL/OpenCL.h" - -#include <vector> -#include <list> - -namespace armnn -{ - -/// OpenClTimer instrument that times all OpenCl kernels executed between calls to Start() and Stop(). -class OpenClTimer : public Instrument -{ -public: - OpenClTimer(); - ~OpenClTimer() = default; - - /// Start the OpenCl timer - void Start() override; - - /// Stop the OpenCl timer - void Stop() override; - - /// Get the name of the timer - /// \return Name of the timer - const char* GetName() const override { return "OpenClKernelTimer"; } - - /// Get the recorded measurements. This will be a list of the execution durations for all the OpenCl kernels. - /// \return Recorded measurements - std::vector<Measurement> GetMeasurements() const override; - -private: - using CLScheduler = arm_compute::CLScheduler; - using CLSymbols = arm_compute::CLSymbols; - using ClEvent = cl::Event; - using ClEnqueueFunc = decltype(CLSymbols::clEnqueueNDRangeKernel_ptr); - - /// Stores info about the OpenCl kernel - struct KernelInfo - { - KernelInfo(const std::string& name, cl_event& event) : m_Name(name), m_Event(event) {} - - std::string m_Name; - ClEvent m_Event; - }; - - std::list<KernelInfo> m_Kernels; ///< List of all kernels executed - ClEnqueueFunc m_OriginalEnqueueFunction; ///< Keep track of original OpenCl function -}; - -} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/test/CreateWorkloadClNeon.hpp b/src/armnn/test/CreateWorkloadClNeon.hpp deleted file mode 100644 index 56de085f8e..0000000000 --- a/src/armnn/test/CreateWorkloadClNeon.hpp +++ /dev/null @@ -1,108 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "CreateWorkload.hpp" - -#include <backends/MemCopyWorkload.hpp> -#include <backends/reference/RefWorkloadFactory.hpp> - -#if ARMCOMPUTECL_ENABLED -#include <backends/cl/ClTensorHandle.hpp> -#endif - -#if ARMCOMPUTENEON_ENABLED -#include <backends/neon/NeonTensorHandle.hpp> -#endif - - -using namespace armnn; - -namespace -{ - -using namespace std; - -template<typename IComputeTensorHandle> -boost::test_tools::predicate_result CompareTensorHandleShape(IComputeTensorHandle* tensorHandle, - std::initializer_list<unsigned int> expectedDimensions) -{ - arm_compute::ITensorInfo* info = tensorHandle->GetTensor().info(); - - auto infoNumDims = info->num_dimensions(); - auto numExpectedDims = expectedDimensions.size(); - if (infoNumDims != numExpectedDims) - { - boost::test_tools::predicate_result res(false); - res.message() << "Different number of dimensions [" << info->num_dimensions() - << "!=" << expectedDimensions.size() << "]"; - return res; - } - - size_t i = info->num_dimensions() - 1; - - for (unsigned int expectedDimension : expectedDimensions) - { - if (info->dimension(i) != expectedDimension) - { - boost::test_tools::predicate_result res(false); - res.message() << "Different dimension [" << info->dimension(i) << "!=" << expectedDimension << "]"; - return res; - } - - i--; - } - - return true; -} - -template<typename IComputeTensorHandle> -void CreateMemCopyWorkloads(IWorkloadFactory& factory) -{ - Graph graph; - RefWorkloadFactory refFactory; - - // Creates the layers we're testing. - Layer* const layer1 = graph.AddLayer<MemCopyLayer>("layer1"); - Layer* const layer2 = graph.AddLayer<MemCopyLayer>("layer2"); - - // Creates extra layers. - Layer* const input = graph.AddLayer<InputLayer>(0, "input"); - Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); - - // Connects up. - TensorInfo tensorInfo({2, 3}, DataType::Float32); - Connect(input, layer1, tensorInfo); - Connect(layer1, layer2, tensorInfo); - Connect(layer2, output, tensorInfo); - - input->CreateTensorHandles(graph, refFactory); - layer1->CreateTensorHandles(graph, factory); - layer2->CreateTensorHandles(graph, refFactory); - output->CreateTensorHandles(graph, refFactory); - - // make the workloads and check them - auto workload1 = MakeAndCheckWorkload<CopyMemGenericWorkload>(*layer1, graph, factory); - auto workload2 = MakeAndCheckWorkload<CopyMemGenericWorkload>(*layer2, graph, refFactory); - - MemCopyQueueDescriptor queueDescriptor1 = workload1->GetData(); - BOOST_TEST(queueDescriptor1.m_Inputs.size() == 1); - BOOST_TEST(queueDescriptor1.m_Outputs.size() == 1); - auto inputHandle1 = boost::polymorphic_downcast<ConstCpuTensorHandle*>(queueDescriptor1.m_Inputs[0]); - auto outputHandle1 = boost::polymorphic_downcast<IComputeTensorHandle*>(queueDescriptor1.m_Outputs[0]); - BOOST_TEST((inputHandle1->GetTensorInfo() == TensorInfo({2, 3}, DataType::Float32))); - BOOST_TEST(CompareTensorHandleShape<IComputeTensorHandle>(outputHandle1, {2, 3})); - - - MemCopyQueueDescriptor queueDescriptor2 = workload2->GetData(); - BOOST_TEST(queueDescriptor2.m_Inputs.size() == 1); - BOOST_TEST(queueDescriptor2.m_Outputs.size() == 1); - auto inputHandle2 = boost::polymorphic_downcast<IComputeTensorHandle*>(queueDescriptor2.m_Inputs[0]); - auto outputHandle2 = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor2.m_Outputs[0]); - BOOST_TEST(CompareTensorHandleShape<IComputeTensorHandle>(inputHandle2, {2, 3})); - BOOST_TEST((outputHandle2->GetTensorInfo() == TensorInfo({2, 3}, DataType::Float32))); -} - -} //namespace
\ No newline at end of file diff --git a/src/armnn/test/FP16SupportTest.cpp b/src/armnn/test/FP16SupportTest.cpp deleted file mode 100644 index 2706d1f363..0000000000 --- a/src/armnn/test/FP16SupportTest.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include <armnn/ArmNN.hpp> -#include <armnn/Descriptors.hpp> -#include <armnn/IRuntime.hpp> -#include <armnn/INetwork.hpp> -#include <armnnUtils/Half.hpp> - -#include <Graph.hpp> -#include <Optimizer.hpp> -#include <backends/CpuTensorHandle.hpp> -#include <backends/test/QuantizeHelper.hpp> - -#include <boost/core/ignore_unused.hpp> -#include <boost/test/unit_test.hpp> - -#include <set> - -using namespace armnn; - -BOOST_AUTO_TEST_SUITE(Fp16Support) - -BOOST_AUTO_TEST_CASE(Fp16DataTypeSupport) -{ - Graph graph; - - Layer* const inputLayer1 = graph.AddLayer<InputLayer>(1, "input1"); - Layer* const inputLayer2 = graph.AddLayer<InputLayer>(2, "input2"); - - Layer* const additionLayer = graph.AddLayer<AdditionLayer>("addition"); - Layer* const outputLayer = graph.AddLayer<armnn::OutputLayer>(0, "output"); - - TensorInfo fp16TensorInfo({1, 2, 3, 5}, armnn::DataType::Float16); - inputLayer1->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0)); - inputLayer2->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(1)); - additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); - - inputLayer1->GetOutputSlot().SetTensorInfo(fp16TensorInfo); - inputLayer2->GetOutputSlot().SetTensorInfo(fp16TensorInfo); - additionLayer->GetOutputSlot().SetTensorInfo(fp16TensorInfo); - - BOOST_CHECK(inputLayer1->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); - BOOST_CHECK(inputLayer2->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); - BOOST_CHECK(additionLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); - -} - -BOOST_AUTO_TEST_CASE(Fp16AdditionTest) -{ - using namespace half_float::literal; - // Create runtime in which test will run - IRuntime::CreationOptions options; - IRuntimePtr runtime(IRuntime::Create(options)); - - // Builds up the structure of the network. - INetworkPtr net(INetwork::Create()); - - - IConnectableLayer* inputLayer1 = net->AddInputLayer(0); - IConnectableLayer* inputLayer2 = net->AddInputLayer(1); - IConnectableLayer* additionLayer = net->AddAdditionLayer(); - IConnectableLayer* outputLayer = net->AddOutputLayer(0); - - inputLayer1->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0)); - inputLayer2->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(1)); - additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); - - //change to float16 - TensorInfo fp16TensorInfo(TensorShape({4}), DataType::Float16); - inputLayer1->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo); - inputLayer2->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo); - additionLayer->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo); - - // optimize the network - std::vector<Compute> backends = {Compute::GpuAcc}; - IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); - - // Loads it into the runtime. - - NetworkId netId; - runtime->LoadNetwork(netId, std::move(optNet)); - - std::vector<Half> input1Data - { - 1.0_h, 2.0_h, 3.0_h, 4.0_h - }; - - std::vector<Half> input2Data - { - 100.0_h, 200.0_h, 300.0_h, 400.0_h - }; - - InputTensors inputTensors - { - {0,ConstTensor(runtime->GetInputTensorInfo(netId, 0), input1Data.data())}, - {1,ConstTensor(runtime->GetInputTensorInfo(netId, 0), input2Data.data())} - }; - - std::vector<Half> outputData(input1Data.size()); - OutputTensors outputTensors - { - {0,Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} - }; - - // Does the inference. - runtime->EnqueueWorkload(netId, inputTensors, outputTensors); - - // Checks the results. - BOOST_TEST(outputData == std::vector<Half>({ 101.0_h, 202.0_h, 303.0_h, 404.0_h})); // Add -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/test/NeonTimerTest.cpp b/src/armnn/test/NeonTimerTest.cpp deleted file mode 100644 index 6d0429c8b9..0000000000 --- a/src/armnn/test/NeonTimerTest.cpp +++ /dev/null @@ -1,103 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include <NeonTimer.hpp> -#include "TensorHelpers.hpp" - -#include <armnn/ArmNN.hpp> -#include <armnn/Tensor.hpp> -#include <armnn/TypesUtils.hpp> -#include <backends/CpuTensorHandle.hpp> -#include <backends/neon/NeonWorkloadFactory.hpp> -#include <backends/WorkloadFactory.hpp> -#include <backends/test/LayerTests.hpp> -#include <backends/test/TensorCopyUtils.hpp> -#include <backends/test/WorkloadTestUtils.hpp> - -#include <boost/test/unit_test.hpp> -#include <cstdlib> -#include <algorithm> - -using namespace armnn; - -BOOST_AUTO_TEST_SUITE(NeonTimerInstrument) - - -BOOST_AUTO_TEST_CASE(NeonTimerGetName) -{ - NeonTimer neonTimer; - BOOST_CHECK_EQUAL(neonTimer.GetName(), "NeonKernelTimer"); -} - -BOOST_AUTO_TEST_CASE(NeonTimerMeasure) -{ - NeonWorkloadFactory workloadFactory; - - unsigned int inputWidth = 4000u; - unsigned int inputHeight = 5000u; - unsigned int inputChannels = 1u; - unsigned int inputBatchSize = 1u; - - float upperBound = 1.0f; - float lowerBound = -1.0f; - - size_t inputSize = inputWidth * inputHeight * inputChannels * inputBatchSize; - std::vector<float> inputData(inputSize, 0.f); - std::generate(inputData.begin(), inputData.end(), [](){ - return (static_cast<float>(rand()) / static_cast<float>(RAND_MAX / 3)) + 1.f; }); - - unsigned int outputWidth = inputWidth; - unsigned int outputHeight = inputHeight; - unsigned int outputChannels = inputChannels; - unsigned int outputBatchSize = inputBatchSize; - - armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, - armnn::GetDataType<float>()); - - armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, - armnn::GetDataType<float>()); - - LayerTestResult<float, 4> result(inputTensorInfo); - - auto input = MakeTensor<float, 4>(inputTensorInfo, inputData); - - std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - // Setup bounded ReLu - armnn::ActivationQueueDescriptor descriptor; - armnn::WorkloadInfo workloadInfo; - AddInputToWorkload(descriptor, workloadInfo, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(descriptor, workloadInfo, outputTensorInfo, outputHandle.get()); - - descriptor.m_Parameters.m_Function = armnn::ActivationFunction::BoundedReLu; - descriptor.m_Parameters.m_A = upperBound; - descriptor.m_Parameters.m_B = lowerBound; - - std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateActivation(descriptor, workloadInfo); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - NeonTimer neonTimer; - // Start the timer. - neonTimer.Start(); - // Execute the workload. - workload->Execute(); - // Stop the timer. - neonTimer.Stop(); - - std::vector<Measurement> measurements = neonTimer.GetMeasurements(); - - BOOST_CHECK_EQUAL(measurements.size(), 2); - BOOST_CHECK_EQUAL(measurements[0].m_Name, "NeonKernelTimer/0: NEFillBorderKernel"); - BOOST_CHECK(measurements[0].m_Value > 0.0); - BOOST_CHECK_EQUAL(measurements[1].m_Name, "NeonKernelTimer/1: NEActivationLayerKernel"); - BOOST_CHECK(measurements[1].m_Value > 0.0); -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/test/OpenClTimerTest.cpp b/src/armnn/test/OpenClTimerTest.cpp deleted file mode 100644 index 76cffec4f3..0000000000 --- a/src/armnn/test/OpenClTimerTest.cpp +++ /dev/null @@ -1,137 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#if (defined(__aarch64__)) || (defined(__x86_64__)) // disable test failing on FireFly/Armv7 - -#include <arm_compute/runtime/CL/CLScheduler.h> -#include <backends/cl/ClContextControl.hpp> -#include <backends/cl/ClWorkloadFactory.hpp> -#include <backends/CpuTensorHandle.hpp> -#include <boost/format.hpp> -#include <iostream> -#include <OpenClTimer.hpp> -#include <backends/test/TensorCopyUtils.hpp> -#include "TensorHelpers.hpp" -#include <boost/test/unit_test.hpp> -#include <backends/WorkloadFactory.hpp> -#include <backends/test/WorkloadTestUtils.hpp> - -using namespace armnn; - -struct OpenClFixture -{ - // Initialising ClContextControl to ensure OpenCL is loaded correctly for each test case. - // NOTE: Profiling needs to be enabled in ClContextControl to be able to obtain execution - // times from OpenClTimer. - OpenClFixture() : m_ClContextControl(nullptr, true) {} - ~OpenClFixture() {} - - ClContextControl m_ClContextControl; -}; - -BOOST_FIXTURE_TEST_SUITE(OpenClTimerBatchNorm, OpenClFixture) -using FactoryType = ClWorkloadFactory; - -BOOST_AUTO_TEST_CASE(OpenClTimerBatchNorm) -{ - ClWorkloadFactory workloadFactory; - - const unsigned int width = 2; - const unsigned int height = 3; - const unsigned int channels = 2; - const unsigned int num = 1; - int32_t qOffset = 0; - float qScale = 0.f; - - TensorInfo inputTensorInfo({num, channels, height, width}, GetDataType<float>()); - TensorInfo outputTensorInfo({num, channels, height, width}, GetDataType<float>()); - TensorInfo tensorInfo({channels}, GetDataType<float>()); - - // Set quantization parameters if the requested type is a quantized type. - if(IsQuantizedType<float>()) - { - inputTensorInfo.SetQuantizationScale(qScale); - inputTensorInfo.SetQuantizationOffset(qOffset); - outputTensorInfo.SetQuantizationScale(qScale); - outputTensorInfo.SetQuantizationOffset(qOffset); - tensorInfo.SetQuantizationScale(qScale); - tensorInfo.SetQuantizationOffset(qOffset); - } - - auto input = MakeTensor<float, 4>(inputTensorInfo, - QuantizedVector<float>(qScale, qOffset, - { - 1.f, 4.f, - 4.f, 2.f, - 1.f, 6.f, - - 1.f, 1.f, - 4.f, 1.f, - -2.f, 4.f - })); - // these values are per-channel of the input - auto mean = MakeTensor<float, 1>(tensorInfo, QuantizedVector<float>(qScale, qOffset, {3, -2})); - auto variance = MakeTensor<float, 1>(tensorInfo, QuantizedVector<float>(qScale, qOffset, {4, 9})); - auto beta = MakeTensor<float, 1>(tensorInfo, QuantizedVector<float>(qScale, qOffset, {3, 2})); - auto gamma = MakeTensor<float, 1>(tensorInfo, QuantizedVector<float>(qScale, qOffset, {2, 1})); - - std::unique_ptr<ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr<ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - - BatchNormalizationQueueDescriptor data; - WorkloadInfo info; - ScopedCpuTensorHandle meanTensor(tensorInfo); - ScopedCpuTensorHandle varianceTensor(tensorInfo); - ScopedCpuTensorHandle betaTensor(tensorInfo); - ScopedCpuTensorHandle gammaTensor(tensorInfo); - - AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]); - AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]); - AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]); - AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]); - - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - data.m_Mean = &meanTensor; - data.m_Variance = &varianceTensor; - data.m_Beta = &betaTensor; - data.m_Gamma = &gammaTensor; - data.m_Parameters.m_Eps = 0.0f; - - // for each channel: - // substract mean, divide by standard deviation (with an epsilon to avoid div by 0) - // multiply by gamma and add beta - std::unique_ptr<IWorkload> workload = workloadFactory.CreateBatchNormalization(data, info); - - inputHandle->Allocate(); - outputHandle->Allocate(); - - CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); - - OpenClTimer openClTimer; - - BOOST_CHECK_EQUAL(openClTimer.GetName(), "OpenClKernelTimer"); - - //Start the timer - openClTimer.Start(); - - //Execute the workload - workload->Execute(); - - //Stop the timer - openClTimer.Stop(); - - BOOST_CHECK_EQUAL(openClTimer.GetMeasurements().size(), 1); - - BOOST_CHECK_EQUAL(openClTimer.GetMeasurements().front().m_Name, - "OpenClKernelTimer/0: batchnormalization_layer_nchw GWS[1,3,2]"); - - BOOST_CHECK(openClTimer.GetMeasurements().front().m_Value > 0); - -} - -BOOST_AUTO_TEST_SUITE_END() - -#endif //aarch64 or x86_64
\ No newline at end of file |