aboutsummaryrefslogtreecommitdiff
path: root/src/backends/cl/test
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/cl/test')
-rw-r--r--src/backends/cl/test/CMakeLists.txt2
-rw-r--r--src/backends/cl/test/ClCreateWorkloadTests.cpp6
-rw-r--r--src/backends/cl/test/ClMemCopyTests.cpp39
-rw-r--r--src/backends/cl/test/Fp16SupportTest.cpp112
-rw-r--r--src/backends/cl/test/OpenClTimerTest.cpp143
5 files changed, 300 insertions, 2 deletions
diff --git a/src/backends/cl/test/CMakeLists.txt b/src/backends/cl/test/CMakeLists.txt
index d365290a6c..4936a78645 100644
--- a/src/backends/cl/test/CMakeLists.txt
+++ b/src/backends/cl/test/CMakeLists.txt
@@ -8,6 +8,8 @@ list(APPEND armnnClBackendUnitTests_sources
ClCreateWorkloadTests.cpp
ClLayerSupportTests.cpp
ClLayerTests.cpp
+ ClMemCopyTests.cpp
+ OpenClTimerTest.cpp
)
add_library(armnnClBackendUnitTests OBJECT ${armnnClBackendUnitTests_sources})
diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp
index 66c2c2aa40..526dc68fc5 100644
--- a/src/backends/cl/test/ClCreateWorkloadTests.cpp
+++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp
@@ -6,13 +6,15 @@
#include "ClContextControlFixture.hpp"
#include <backends/MemCopyWorkload.hpp>
+
+#include <backends/aclCommon/test/CreateWorkloadClNeon.hpp>
+
#include <backends/cl/ClTensorHandle.hpp>
#include <backends/cl/ClWorkloadFactory.hpp>
#include <backends/cl/workloads/ClWorkloads.hpp>
#include <backends/cl/workloads/ClWorkloadUtils.hpp>
-#include <backends/reference/RefWorkloadFactory.hpp>
-#include <test/CreateWorkloadClNeon.hpp>
+#include <backends/reference/RefWorkloadFactory.hpp>
boost::test_tools::predicate_result CompareIClTensorHandleShape(IClTensorHandle* tensorHandle,
std::initializer_list<unsigned int> expectedDimensions)
diff --git a/src/backends/cl/test/ClMemCopyTests.cpp b/src/backends/cl/test/ClMemCopyTests.cpp
new file mode 100644
index 0000000000..af8a36d6c0
--- /dev/null
+++ b/src/backends/cl/test/ClMemCopyTests.cpp
@@ -0,0 +1,39 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <backends/cl/ClWorkloadFactory.hpp>
+#include <backends/reference/RefWorkloadFactory.hpp>
+
+#include <backends/test/MemCopyTestImpl.hpp>
+
+#include <boost/test/unit_test.hpp>
+
+BOOST_AUTO_TEST_SUITE(ClMemCopy)
+
+BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpu)
+{
+ LayerTestResult<float, 4> result = MemCopyTest<armnn::RefWorkloadFactory, armnn::ClWorkloadFactory>(false);
+ BOOST_TEST(CompareTensors(result.output, result.outputExpected));
+}
+
+BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpu)
+{
+ LayerTestResult<float, 4> result = MemCopyTest<armnn::ClWorkloadFactory, armnn::RefWorkloadFactory>(false);
+ BOOST_TEST(CompareTensors(result.output, result.outputExpected));
+}
+
+BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpuWithSubtensors)
+{
+ LayerTestResult<float, 4> result = MemCopyTest<armnn::RefWorkloadFactory, armnn::ClWorkloadFactory>(true);
+ BOOST_TEST(CompareTensors(result.output, result.outputExpected));
+}
+
+BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpuWithSubtensors)
+{
+ LayerTestResult<float, 4> result = MemCopyTest<armnn::ClWorkloadFactory, armnn::RefWorkloadFactory>(true);
+ BOOST_TEST(CompareTensors(result.output, result.outputExpected));
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/cl/test/Fp16SupportTest.cpp b/src/backends/cl/test/Fp16SupportTest.cpp
new file mode 100644
index 0000000000..90bef3647b
--- /dev/null
+++ b/src/backends/cl/test/Fp16SupportTest.cpp
@@ -0,0 +1,112 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <armnn/ArmNN.hpp>
+#include <armnn/Descriptors.hpp>
+#include <armnn/IRuntime.hpp>
+#include <armnn/INetwork.hpp>
+#include <armnnUtils/Half.hpp>
+
+#include <Graph.hpp>
+#include <Optimizer.hpp>
+#include <backends/CpuTensorHandle.hpp>
+#include <backends/test/QuantizeHelper.hpp>
+
+#include <boost/core/ignore_unused.hpp>
+#include <boost/test/unit_test.hpp>
+
+#include <set>
+
+using namespace armnn;
+
+BOOST_AUTO_TEST_SUITE(Fp16Support)
+
+BOOST_AUTO_TEST_CASE(Fp16DataTypeSupport)
+{
+ Graph graph;
+
+ Layer* const inputLayer1 = graph.AddLayer<InputLayer>(1, "input1");
+ Layer* const inputLayer2 = graph.AddLayer<InputLayer>(2, "input2");
+
+ Layer* const additionLayer = graph.AddLayer<AdditionLayer>("addition");
+ Layer* const outputLayer = graph.AddLayer<armnn::OutputLayer>(0, "output");
+
+ TensorInfo fp16TensorInfo({1, 2, 3, 5}, armnn::DataType::Float16);
+ inputLayer1->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0));
+ inputLayer2->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(1));
+ additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+ inputLayer1->GetOutputSlot().SetTensorInfo(fp16TensorInfo);
+ inputLayer2->GetOutputSlot().SetTensorInfo(fp16TensorInfo);
+ additionLayer->GetOutputSlot().SetTensorInfo(fp16TensorInfo);
+
+ BOOST_CHECK(inputLayer1->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16);
+ BOOST_CHECK(inputLayer2->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16);
+ BOOST_CHECK(additionLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16);
+}
+
+BOOST_AUTO_TEST_CASE(Fp16AdditionTest)
+{
+ using namespace half_float::literal;
+ // Create runtime in which test will run
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(IRuntime::Create(options));
+
+ // Builds up the structure of the network.
+ INetworkPtr net(INetwork::Create());
+
+ IConnectableLayer* inputLayer1 = net->AddInputLayer(0);
+ IConnectableLayer* inputLayer2 = net->AddInputLayer(1);
+ IConnectableLayer* additionLayer = net->AddAdditionLayer();
+ IConnectableLayer* outputLayer = net->AddOutputLayer(0);
+
+ inputLayer1->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0));
+ inputLayer2->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(1));
+ additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+ //change to float16
+ TensorInfo fp16TensorInfo(TensorShape({4}), DataType::Float16);
+ inputLayer1->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo);
+ inputLayer2->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo);
+ additionLayer->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo);
+
+ // optimize the network
+ std::vector<Compute> backends = {Compute::GpuAcc};
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+
+ // Loads it into the runtime.
+ NetworkId netId;
+ runtime->LoadNetwork(netId, std::move(optNet));
+
+ std::vector<Half> input1Data
+ {
+ 1.0_h, 2.0_h, 3.0_h, 4.0_h
+ };
+
+ std::vector<Half> input2Data
+ {
+ 100.0_h, 200.0_h, 300.0_h, 400.0_h
+ };
+
+ InputTensors inputTensors
+ {
+ {0,ConstTensor(runtime->GetInputTensorInfo(netId, 0), input1Data.data())},
+ {1,ConstTensor(runtime->GetInputTensorInfo(netId, 0), input2Data.data())}
+ };
+
+ std::vector<Half> outputData(input1Data.size());
+ OutputTensors outputTensors
+ {
+ {0,Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
+ };
+
+ // Does the inference.
+ runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+
+ // Checks the results.
+ BOOST_TEST(outputData == std::vector<Half>({ 101.0_h, 202.0_h, 303.0_h, 404.0_h})); // Add
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/cl/test/OpenClTimerTest.cpp b/src/backends/cl/test/OpenClTimerTest.cpp
new file mode 100644
index 0000000000..70ceac2a3f
--- /dev/null
+++ b/src/backends/cl/test/OpenClTimerTest.cpp
@@ -0,0 +1,143 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#if (defined(__aarch64__)) || (defined(__x86_64__)) // disable test failing on FireFly/Armv7
+
+#include <armnn/test/TensorHelpers.hpp>
+
+#include <backends/CpuTensorHandle.hpp>
+#include <backends/WorkloadFactory.hpp>
+
+#include <backends/cl/ClContextControl.hpp>
+#include <backends/cl/ClWorkloadFactory.hpp>
+#include <backends/cl/OpenClTimer.hpp>
+
+#include <backends/test/TensorCopyUtils.hpp>
+#include <backends/test/WorkloadTestUtils.hpp>
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+#include <boost/format.hpp>
+#include <boost/test/unit_test.hpp>
+
+#include <iostream>
+
+using namespace armnn;
+
+struct OpenClFixture
+{
+ // Initialising ClContextControl to ensure OpenCL is loaded correctly for each test case.
+ // NOTE: Profiling needs to be enabled in ClContextControl to be able to obtain execution
+ // times from OpenClTimer.
+ OpenClFixture() : m_ClContextControl(nullptr, true) {}
+ ~OpenClFixture() {}
+
+ ClContextControl m_ClContextControl;
+};
+
+BOOST_FIXTURE_TEST_SUITE(OpenClTimerBatchNorm, OpenClFixture)
+using FactoryType = ClWorkloadFactory;
+
+BOOST_AUTO_TEST_CASE(OpenClTimerBatchNorm)
+{
+ ClWorkloadFactory workloadFactory;
+
+ const unsigned int width = 2;
+ const unsigned int height = 3;
+ const unsigned int channels = 2;
+ const unsigned int num = 1;
+ int32_t qOffset = 0;
+ float qScale = 0.f;
+
+ TensorInfo inputTensorInfo({num, channels, height, width}, GetDataType<float>());
+ TensorInfo outputTensorInfo({num, channels, height, width}, GetDataType<float>());
+ TensorInfo tensorInfo({channels}, GetDataType<float>());
+
+ // Set quantization parameters if the requested type is a quantized type.
+ if(IsQuantizedType<float>())
+ {
+ inputTensorInfo.SetQuantizationScale(qScale);
+ inputTensorInfo.SetQuantizationOffset(qOffset);
+ outputTensorInfo.SetQuantizationScale(qScale);
+ outputTensorInfo.SetQuantizationOffset(qOffset);
+ tensorInfo.SetQuantizationScale(qScale);
+ tensorInfo.SetQuantizationOffset(qOffset);
+ }
+
+ auto input = MakeTensor<float, 4>(inputTensorInfo,
+ QuantizedVector<float>(qScale, qOffset,
+ {
+ 1.f, 4.f,
+ 4.f, 2.f,
+ 1.f, 6.f,
+
+ 1.f, 1.f,
+ 4.f, 1.f,
+ -2.f, 4.f
+ }));
+ // these values are per-channel of the input
+ auto mean = MakeTensor<float, 1>(tensorInfo, QuantizedVector<float>(qScale, qOffset, {3, -2}));
+ auto variance = MakeTensor<float, 1>(tensorInfo, QuantizedVector<float>(qScale, qOffset, {4, 9}));
+ auto beta = MakeTensor<float, 1>(tensorInfo, QuantizedVector<float>(qScale, qOffset, {3, 2}));
+ auto gamma = MakeTensor<float, 1>(tensorInfo, QuantizedVector<float>(qScale, qOffset, {2, 1}));
+
+ std::unique_ptr<ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+ std::unique_ptr<ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+ BatchNormalizationQueueDescriptor data;
+ WorkloadInfo info;
+ ScopedCpuTensorHandle meanTensor(tensorInfo);
+ ScopedCpuTensorHandle varianceTensor(tensorInfo);
+ ScopedCpuTensorHandle betaTensor(tensorInfo);
+ ScopedCpuTensorHandle gammaTensor(tensorInfo);
+
+ AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]);
+ AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]);
+ AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]);
+ AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]);
+
+ AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+ AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+ data.m_Mean = &meanTensor;
+ data.m_Variance = &varianceTensor;
+ data.m_Beta = &betaTensor;
+ data.m_Gamma = &gammaTensor;
+ data.m_Parameters.m_Eps = 0.0f;
+
+ // for each channel:
+ // substract mean, divide by standard deviation (with an epsilon to avoid div by 0)
+ // multiply by gamma and add beta
+ std::unique_ptr<IWorkload> workload = workloadFactory.CreateBatchNormalization(data, info);
+
+ inputHandle->Allocate();
+ outputHandle->Allocate();
+
+ CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+
+ OpenClTimer openClTimer;
+
+ BOOST_CHECK_EQUAL(openClTimer.GetName(), "OpenClKernelTimer");
+
+ //Start the timer
+ openClTimer.Start();
+
+ //Execute the workload
+ workload->Execute();
+
+ //Stop the timer
+ openClTimer.Stop();
+
+ BOOST_CHECK_EQUAL(openClTimer.GetMeasurements().size(), 1);
+
+ BOOST_CHECK_EQUAL(openClTimer.GetMeasurements().front().m_Name,
+ "OpenClKernelTimer/0: batchnormalization_layer_nchw GWS[1,3,2]");
+
+ BOOST_CHECK(openClTimer.GetMeasurements().front().m_Value > 0);
+
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
+#endif //aarch64 or x86_64 \ No newline at end of file