aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorFinn Williams <Finn.Williams@arm.com>2021-03-31 16:22:40 +0100
committerJim Flynn <jim.flynn@arm.com>2021-04-08 08:44:11 +0000
commitb76eaed55a89330b3b448c4f4522b3fc94a4f38d (patch)
treea8270de0ad19f2121b217e93fd570214254df69b /src
parent30aa3713a21888dcbc640182a2fcf2d161348d62 (diff)
downloadarmnn-b76eaed55a89330b3b448c4f4522b3fc94a4f38d.tar.gz
IVGCVSW-5793 Add default ExecuteAsync implementation to Workload
Signed-off-by: Finn Williams <Finn.Williams@arm.com> Change-Id: If2069b4d274286e654ac2bceb52d147f9ee3a7a9
Diffstat (limited to 'src')
-rw-r--r--src/backends/backendsCommon/Workload.hpp16
-rw-r--r--src/backends/backendsCommon/test/CMakeLists.txt1
-rw-r--r--src/backends/backendsCommon/test/DefaultAsyncExecuteTest.cpp249
3 files changed, 264 insertions, 2 deletions
diff --git a/src/backends/backendsCommon/Workload.hpp b/src/backends/backendsCommon/Workload.hpp
index 940b878d2f..87869c9841 100644
--- a/src/backends/backendsCommon/Workload.hpp
+++ b/src/backends/backendsCommon/Workload.hpp
@@ -37,7 +37,16 @@ public:
m_Data.Validate(info);
}
- void ExecuteAsync(WorkingMemDescriptor&) override {};
+ void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override
+ {
+ ARMNN_LOG(info) << "Using default async workload execution, this will network affect performance";
+ std::lock_guard<std::mutex> lockGuard(m_AsyncWorkloadMutex);
+
+ m_Data.m_Inputs = workingMemDescriptor.m_Inputs;
+ m_Data.m_Outputs = workingMemDescriptor.m_Outputs;
+
+ Execute();
+ };
void PostAllocationConfigure() override {}
@@ -46,8 +55,11 @@ public:
profiling::ProfilingGuid GetGuid() const final { return m_Guid; }
protected:
- const QueueDescriptor m_Data;
+ QueueDescriptor m_Data;
const profiling::ProfilingGuid m_Guid;
+
+private:
+ std::mutex m_AsyncWorkloadMutex;
};
// TypedWorkload used
diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt
index 9d36f52b59..248ada92a2 100644
--- a/src/backends/backendsCommon/test/CMakeLists.txt
+++ b/src/backends/backendsCommon/test/CMakeLists.txt
@@ -15,6 +15,7 @@ list(APPEND armnnBackendsCommonUnitTests_sources
ComparisonEndToEndTestImpl.hpp
DataLayoutUtils.hpp
DataTypeUtils.hpp
+ DefaultAsyncExecuteTest.cpp
DepthToSpaceEndToEndTestImpl.hpp
DequantizeEndToEndTestImpl.hpp
DetectionPostProcessEndToEndTestImpl.hpp
diff --git a/src/backends/backendsCommon/test/DefaultAsyncExecuteTest.cpp b/src/backends/backendsCommon/test/DefaultAsyncExecuteTest.cpp
new file mode 100644
index 0000000000..0d4595210e
--- /dev/null
+++ b/src/backends/backendsCommon/test/DefaultAsyncExecuteTest.cpp
@@ -0,0 +1,249 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <armnn/Exceptions.hpp>
+
+#include <backendsCommon/CpuTensorHandle.hpp>
+#include <backendsCommon/Workload.hpp>
+
+#include <boost/test/unit_test.hpp>
+
+using namespace armnn;
+
+BOOST_AUTO_TEST_SUITE(WorkloadAsyncExecuteTests)
+
+namespace
+{
+
+struct Workload0 : BaseWorkload<ElementwiseUnaryQueueDescriptor>
+{
+ Workload0(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : BaseWorkload(descriptor, info)
+ {
+ }
+
+ Workload0() : BaseWorkload(ElementwiseUnaryQueueDescriptor(), WorkloadInfo())
+ {
+ }
+
+ void Execute() const
+ {
+ int* inVals = static_cast<int*>(m_Data.m_Inputs[0][0].Map());
+ int* outVals = static_cast<int*>(m_Data.m_Outputs[0][0].Map());
+
+ for (unsigned int i = 0; i < m_Data.m_Inputs[0][0].GetShape().GetNumElements(); ++i)
+ {
+ outVals[i] = inVals[i] * outVals[i];
+ inVals[i] = outVals[i];
+ }
+ }
+
+ void ExecuteAsync(WorkingMemDescriptor& desc)
+ {
+ int* inVals = static_cast<int*>(desc.m_Inputs[0][0].Map());
+ int* outVals = static_cast<int*>(desc.m_Outputs[0][0].Map());
+
+ for (unsigned int i = 0; i < desc.m_Inputs[0][0].GetShape().GetNumElements(); ++i)
+ {
+ outVals[i] = inVals[i] + outVals[i];
+ inVals[i] = outVals[i];
+ }
+ }
+
+ QueueDescriptor* GetQueueDescriptor()
+ {
+ return &m_Data;
+ }
+};
+
+struct Workload1 : BaseWorkload<ElementwiseUnaryQueueDescriptor>
+{
+ Workload1(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : BaseWorkload(descriptor, info)
+ {
+ }
+
+ void Execute() const
+ {
+ int* inVals = static_cast<int*>(m_Data.m_Inputs[0][0].Map());
+ int* outVals = static_cast<int*>(m_Data.m_Outputs[0][0].Map());
+
+ for (unsigned int i = 0; i < m_Data.m_Inputs[0][0].GetShape().GetNumElements(); ++i)
+ {
+ outVals[i] = inVals[i] * outVals[i];
+ inVals[i] = outVals[i];
+ }
+ }
+};
+
+void ValidateTensor(ITensorHandle* tensorHandle, int expectedValue)
+{
+ int* actualOutput = static_cast<int*>(tensorHandle->Map());
+
+ bool allValuesCorrect = true;
+ for (unsigned int i = 0; i < tensorHandle->GetShape().GetNumElements(); ++i)
+ {
+ if (actualOutput[i] != expectedValue)
+ {
+ allValuesCorrect = false;
+ }
+ }
+
+ BOOST_CHECK(allValuesCorrect);
+}
+
+template<typename Workload>
+std::unique_ptr<Workload> CreateWorkload(TensorInfo info, ITensorHandle* inputTensor, ITensorHandle* outputTensor)
+{
+ WorkloadInfo workloadInfo;
+ workloadInfo.m_InputTensorInfos = std::vector<TensorInfo>{info};
+ workloadInfo.m_OutputTensorInfos = std::vector<TensorInfo>{info};
+
+ ElementwiseUnaryQueueDescriptor elementwiseUnaryQueueDescriptor;
+ elementwiseUnaryQueueDescriptor.m_Inputs = std::vector<ITensorHandle*>{inputTensor};
+ elementwiseUnaryQueueDescriptor.m_Outputs = std::vector<ITensorHandle*>{outputTensor};
+
+ return std::make_unique<Workload>(elementwiseUnaryQueueDescriptor, workloadInfo);
+}
+
+BOOST_AUTO_TEST_CASE(TestAsyncExecute)
+{
+ TensorInfo info({5}, DataType::Signed32);
+
+ int inVals[5]{2, 2, 2, 2, 2};
+ int outVals[5]{1, 1, 1, 1, 1};
+
+ int expectedExecuteval = 2;
+ int expectedExecuteAsyncval = 3;
+
+ ConstTensor constInputTensor(info, inVals);
+ ConstTensor constOutputTensor(info, outVals);
+
+ ScopedCpuTensorHandle syncInput0(constInputTensor);
+ ScopedCpuTensorHandle syncOutput0(constOutputTensor);
+
+ std::unique_ptr<Workload0> workload0 = CreateWorkload<Workload0>(info, &syncInput0, &syncOutput0);
+
+ workload0.get()->Execute();
+
+ ScopedCpuTensorHandle asyncInput0(constInputTensor);
+ ScopedCpuTensorHandle asyncOutput0(constOutputTensor);
+
+ WorkingMemDescriptor workingMemDescriptor0;
+ workingMemDescriptor0.m_Inputs = std::vector<ITensorHandle*>{&asyncInput0};
+ workingMemDescriptor0.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput0};
+
+ workload0.get()->ExecuteAsync(workingMemDescriptor0);
+
+ // Inputs are also changed by the execute/executeAsync calls to make sure there is no interference with them
+ ValidateTensor(workingMemDescriptor0.m_Outputs[0], expectedExecuteAsyncval);
+ ValidateTensor(workingMemDescriptor0.m_Inputs[0], expectedExecuteAsyncval);
+
+ ValidateTensor(&workload0.get()->GetQueueDescriptor()->m_Outputs[0][0], expectedExecuteval);
+ ValidateTensor(&workload0.get()->GetQueueDescriptor()->m_Inputs[0][0], expectedExecuteval);
+}
+
+BOOST_AUTO_TEST_CASE(TestDefaultAsyncExecute)
+{
+ TensorInfo info({5}, DataType::Signed32);
+
+ std::vector<int> inVals{2, 2, 2, 2, 2};
+ std::vector<int> outVals{1, 1, 1, 1, 1};
+ std::vector<int> defaultVals{0, 0, 0, 0, 0};
+
+ int expectedExecuteval = 2;
+
+ ConstTensor constInputTensor(info, inVals);
+ ConstTensor constOutputTensor(info, outVals);
+ ConstTensor defaultTensor(info, &defaultVals);
+
+ ScopedCpuTensorHandle defaultInput = ScopedCpuTensorHandle(defaultTensor);
+ ScopedCpuTensorHandle defaultOutput = ScopedCpuTensorHandle(defaultTensor);
+
+ std::unique_ptr<Workload1> workload1 = CreateWorkload<Workload1>(info, &defaultInput, &defaultOutput);
+
+ ScopedCpuTensorHandle asyncInput(constInputTensor);
+ ScopedCpuTensorHandle asyncOutput(constOutputTensor);
+
+ WorkingMemDescriptor workingMemDescriptor;
+ workingMemDescriptor.m_Inputs = std::vector<ITensorHandle*>{&asyncInput};
+ workingMemDescriptor.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput};
+
+ workload1.get()->ExecuteAsync(workingMemDescriptor);
+
+ // workload1 has no AsyncExecute implementation and so should use the default workload AsyncExecute
+ // implementation which will call workload1.Execute() in a thread safe manner
+ ValidateTensor(workingMemDescriptor.m_Outputs[0], expectedExecuteval);
+ ValidateTensor(workingMemDescriptor.m_Inputs[0], expectedExecuteval);
+}
+
+BOOST_AUTO_TEST_CASE(TestDefaultAsyncExeuteWithThreads)
+{
+ // Use a large vector so the threads have a chance to interact
+ unsigned int vecSize = 1000;
+ TensorInfo info({vecSize}, DataType::Signed32);
+
+ std::vector<int> inVals1(vecSize, 2);
+ std::vector<int> outVals1(vecSize, 1);
+ std::vector<int> inVals2(vecSize, 5);
+ std::vector<int> outVals2(vecSize, -1);
+
+ std::vector<int> defaultVals(vecSize, 0);
+
+ int expectedExecuteval1 = 4;
+ int expectedExecuteval2 = 25;
+ ConstTensor constInputTensor1(info, inVals1);
+ ConstTensor constOutputTensor1(info, outVals1);
+
+ ConstTensor constInputTensor2(info, inVals2);
+ ConstTensor constOutputTensor2(info, outVals2);
+
+ ConstTensor defaultTensor(info, &defaultVals);
+
+ ScopedCpuTensorHandle defaultInput = ScopedCpuTensorHandle(defaultTensor);
+ ScopedCpuTensorHandle defaultOutput = ScopedCpuTensorHandle(defaultTensor);
+ std::unique_ptr<Workload1> workload = CreateWorkload<Workload1>(info, &defaultInput, &defaultOutput);
+
+ ScopedCpuTensorHandle asyncInput1(constInputTensor1);
+ ScopedCpuTensorHandle asyncOutput1(constOutputTensor1);
+
+ WorkingMemDescriptor workingMemDescriptor1;
+ workingMemDescriptor1.m_Inputs = std::vector<ITensorHandle*>{&asyncInput1};
+ workingMemDescriptor1.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput1};
+
+
+ ScopedCpuTensorHandle asyncInput2(constInputTensor2);
+ ScopedCpuTensorHandle asyncOutput2(constOutputTensor2);
+
+ WorkingMemDescriptor workingMemDescriptor2;
+ workingMemDescriptor2.m_Inputs = std::vector<ITensorHandle*>{&asyncInput2};
+ workingMemDescriptor2.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput2};
+
+ std::thread thread1 = std::thread([&]()
+ {
+ workload.get()->ExecuteAsync(workingMemDescriptor1);
+ workload.get()->ExecuteAsync(workingMemDescriptor1);
+ });
+
+ std::thread thread2 = std::thread([&]()
+ {
+ workload.get()->ExecuteAsync(workingMemDescriptor2);
+ workload.get()->ExecuteAsync(workingMemDescriptor2);
+ });
+
+ thread1.join();
+ thread2.join();
+
+ ValidateTensor(workingMemDescriptor1.m_Outputs[0], expectedExecuteval1);
+ ValidateTensor(workingMemDescriptor1.m_Inputs[0], expectedExecuteval1);
+
+ ValidateTensor(workingMemDescriptor2.m_Outputs[0], expectedExecuteval2);
+ ValidateTensor(workingMemDescriptor2.m_Inputs[0], expectedExecuteval2);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
+} \ No newline at end of file