ArmNN
 22.08
DefaultAsyncExecuteTest.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include <armnn/Exceptions.hpp>
7 
10 
11 #include <doctest/doctest.h>
12 
13 #include <thread>
14 
15 using namespace armnn;
16 
17 
18 namespace
19 {
20 
21 TEST_SUITE("WorkloadAsyncExecuteTests")
22 {
23 
24 struct Workload0 : BaseWorkload<ElementwiseUnaryQueueDescriptor>
25 {
26  Workload0(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info)
27  : BaseWorkload(descriptor, info)
28  {
29  }
30 
32  {
33  }
34 
35  void Execute() const
36  {
37  int* inVals = static_cast<int*>(m_Data.m_Inputs[0][0].Map());
38  int* outVals = static_cast<int*>(m_Data.m_Outputs[0][0].Map());
39 
40  for (unsigned int i = 0;
41  i < m_Data.m_Inputs[0][0].GetShape().GetNumElements();
42  ++i)
43  {
44  outVals[i] = inVals[i] * outVals[i];
45  inVals[i] = outVals[i];
46  }
47  }
48 
49  void ExecuteAsync(ExecutionData& executionData)
50  {
51  WorkingMemDescriptor* workingMemDescriptor = static_cast<WorkingMemDescriptor*>(executionData.m_Data);
52  int* inVals = static_cast<int*>(workingMemDescriptor->m_Inputs[0][0].Map());
53  int* outVals = static_cast<int*>(workingMemDescriptor->m_Outputs[0][0].Map());
54 
55  for (unsigned int i = 0;
56  i < workingMemDescriptor->m_Inputs[0][0].GetShape().GetNumElements();
57  ++i)
58  {
59  outVals[i] = inVals[i] + outVals[i];
60  inVals[i] = outVals[i];
61  }
62  }
63 
64  QueueDescriptor* GetQueueDescriptor()
65  {
66  return &m_Data;
67  }
68 };
69 
70 struct Workload1 : BaseWorkload<ElementwiseUnaryQueueDescriptor>
71 {
72  Workload1(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info)
73  : BaseWorkload(descriptor, info)
74  {
75  }
76 
77  void Execute() const
78  {
79  int* inVals = static_cast<int*>(m_Data.m_Inputs[0][0].Map());
80  int* outVals = static_cast<int*>(m_Data.m_Outputs[0][0].Map());
81 
82  for (unsigned int i = 0;
83  i < m_Data.m_Inputs[0][0].GetShape().GetNumElements();
84  ++i)
85  {
86  outVals[i] = inVals[i] * outVals[i];
87  inVals[i] = outVals[i];
88  }
89  }
90 };
91 
92 void ValidateTensor(ITensorHandle* tensorHandle, int expectedValue)
93 {
94  int* actualOutput = static_cast<int*>(tensorHandle->Map());
95 
96  bool allValuesCorrect = true;
97  for (unsigned int i = 0;
98  i < tensorHandle->GetShape().GetNumElements();
99  ++i)
100  {
101  if (actualOutput[i] != expectedValue)
102  {
103  allValuesCorrect = false;
104  }
105  }
106 
107  CHECK(allValuesCorrect);
108 }
109 
110 template<typename Workload>
111 std::unique_ptr<Workload> CreateWorkload(TensorInfo info, ITensorHandle* inputTensor, ITensorHandle* outputTensor)
112 {
113  WorkloadInfo workloadInfo;
114  workloadInfo.m_InputTensorInfos = std::vector<TensorInfo>{info};
115  workloadInfo.m_OutputTensorInfos = std::vector<TensorInfo>{info};
116 
117  ElementwiseUnaryQueueDescriptor elementwiseUnaryQueueDescriptor;
118  elementwiseUnaryQueueDescriptor.m_Inputs = std::vector<ITensorHandle*>{inputTensor};
119  elementwiseUnaryQueueDescriptor.m_Outputs = std::vector<ITensorHandle*>{outputTensor};
120 
121  return std::make_unique<Workload>(elementwiseUnaryQueueDescriptor, workloadInfo);
122 }
123 
124 TEST_CASE("TestAsyncExecute")
125 {
126  TensorInfo info({5}, DataType::Signed32, 0.0, 0, true);
127 
128  int inVals[5]{2, 2, 2, 2, 2};
129  int outVals[5]{1, 1, 1, 1, 1};
130 
131  int expectedExecuteval = 2;
132  int expectedExecuteAsyncval = 3;
133 
134  ConstTensor constInputTensor(info, inVals);
135  ConstTensor constOutputTensor(info, outVals);
136 
137  ScopedTensorHandle syncInput0(constInputTensor);
138  ScopedTensorHandle syncOutput0(constOutputTensor);
139 
140  std::unique_ptr<Workload0> workload0 = CreateWorkload<Workload0>(info, &syncInput0, &syncOutput0);
141 
142  workload0.get()->Execute();
143 
144  ScopedTensorHandle asyncInput0(constInputTensor);
145  ScopedTensorHandle asyncOutput0(constOutputTensor);
146 
147  WorkingMemDescriptor workingMemDescriptor0;
148  workingMemDescriptor0.m_Inputs = std::vector<ITensorHandle*>{&asyncInput0};
149  workingMemDescriptor0.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput0};
150 
151  ExecutionData executionData;
152  executionData.m_Data = &workingMemDescriptor0;
153 
154  workload0.get()->ExecuteAsync(executionData);
155 
156  // Inputs are also changed by the execute/executeAsync calls to make sure there is no interference with them
157  ValidateTensor(workingMemDescriptor0.m_Outputs[0], expectedExecuteAsyncval);
158  ValidateTensor(workingMemDescriptor0.m_Inputs[0], expectedExecuteAsyncval);
159 
160  ValidateTensor(&workload0.get()->GetQueueDescriptor()->m_Outputs[0][0], expectedExecuteval);
161  ValidateTensor(&workload0.get()->GetQueueDescriptor()->m_Inputs[0][0], expectedExecuteval);
162 }
163 
164 TEST_CASE("TestDefaultAsyncExecute")
165 {
166  TensorInfo info({5}, DataType::Signed32, 0.0f, 0, true);
167 
168  std::vector<int> inVals{2, 2, 2, 2, 2};
169  std::vector<int> outVals{1, 1, 1, 1, 1};
170  std::vector<int> defaultVals{0, 0, 0, 0, 0};
171 
172  int expectedExecuteval = 2;
173 
174  ConstTensor constInputTensor(info, inVals);
175  ConstTensor constOutputTensor(info, outVals);
176  ConstTensor defaultTensor(info, &defaultVals);
177 
178  ScopedTensorHandle defaultInput = ScopedTensorHandle(defaultTensor);
179  ScopedTensorHandle defaultOutput = ScopedTensorHandle(defaultTensor);
180 
181  std::unique_ptr<Workload1> workload1 = CreateWorkload<Workload1>(info, &defaultInput, &defaultOutput);
182 
183  ScopedTensorHandle asyncInput(constInputTensor);
184  ScopedTensorHandle asyncOutput(constOutputTensor);
185 
186  WorkingMemDescriptor workingMemDescriptor;
187  workingMemDescriptor.m_Inputs = std::vector<ITensorHandle*>{&asyncInput};
188  workingMemDescriptor.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput};
189 
190  ExecutionData executionData;
191  executionData.m_Data = &workingMemDescriptor;
192 
193  workload1.get()->ExecuteAsync(executionData);
194 
195  // workload1 has no AsyncExecute implementation and so should use the default workload AsyncExecute
196  // implementation which will call workload1.Execute() in a thread safe manner
197  ValidateTensor(workingMemDescriptor.m_Outputs[0], expectedExecuteval);
198  ValidateTensor(workingMemDescriptor.m_Inputs[0], expectedExecuteval);
199 }
200 
201 TEST_CASE("TestDefaultAsyncExeuteWithThreads")
202 {
203  // Use a large vector so the threads have a chance to interact
204  unsigned int vecSize = 1000;
205  TensorInfo info({vecSize}, DataType::Signed32, 0.0f, 0, true);
206 
207  std::vector<int> inVals1(vecSize, 2);
208  std::vector<int> outVals1(vecSize, 1);
209  std::vector<int> inVals2(vecSize, 5);
210  std::vector<int> outVals2(vecSize, -1);
211 
212  std::vector<int> defaultVals(vecSize, 0);
213 
214  int expectedExecuteval1 = 4;
215  int expectedExecuteval2 = 25;
216  ConstTensor constInputTensor1(info, inVals1);
217  ConstTensor constOutputTensor1(info, outVals1);
218 
219  ConstTensor constInputTensor2(info, inVals2);
220  ConstTensor constOutputTensor2(info, outVals2);
221 
222  ConstTensor defaultTensor(info, defaultVals.data());
223 
224  ScopedTensorHandle defaultInput = ScopedTensorHandle(defaultTensor);
225  ScopedTensorHandle defaultOutput = ScopedTensorHandle(defaultTensor);
226  std::unique_ptr<Workload1> workload = CreateWorkload<Workload1>(info, &defaultInput, &defaultOutput);
227 
228  ScopedTensorHandle asyncInput1(constInputTensor1);
229  ScopedTensorHandle asyncOutput1(constOutputTensor1);
230 
231  WorkingMemDescriptor workingMemDescriptor1;
232  workingMemDescriptor1.m_Inputs = std::vector<ITensorHandle*>{&asyncInput1};
233  workingMemDescriptor1.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput1};
234 
235  ExecutionData executionData1;
236  executionData1.m_Data = &workingMemDescriptor1;
237 
238  ScopedTensorHandle asyncInput2(constInputTensor2);
239  ScopedTensorHandle asyncOutput2(constOutputTensor2);
240 
241  WorkingMemDescriptor workingMemDescriptor2;
242  workingMemDescriptor2.m_Inputs = std::vector<ITensorHandle*>{&asyncInput2};
243  workingMemDescriptor2.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput2};
244 
245  ExecutionData executionData2;
246  executionData2.m_Data = &workingMemDescriptor2;
247 
248  std::thread thread1 = std::thread([&]()
249  {
250  workload.get()->ExecuteAsync(executionData1);
251  workload.get()->ExecuteAsync(executionData1);
252  });
253 
254  std::thread thread2 = std::thread([&]()
255  {
256  workload.get()->ExecuteAsync(executionData2);
257  workload.get()->ExecuteAsync(executionData2);
258  });
259 
260  thread1.join();
261  thread2.join();
262 
263  ValidateTensor(workingMemDescriptor1.m_Outputs[0], expectedExecuteval1);
264  ValidateTensor(workingMemDescriptor1.m_Inputs[0], expectedExecuteval1);
265 
266  ValidateTensor(workingMemDescriptor2.m_Outputs[0], expectedExecuteval2);
267  ValidateTensor(workingMemDescriptor2.m_Inputs[0], expectedExecuteval2);
268 }
269 
270 }
271 
272 }
TEST_SUITE("TestConstTensorLayerVisitor")
unsigned int GetNumElements() const
Function that calculates the tensor elements by multiplying all dimension size which are Specified...
Definition: Tensor.cpp:181
std::unique_ptr< armnn::IWorkload > CreateWorkload(const armnn::IWorkloadFactory &workloadFactory, const armnn::WorkloadInfo &info, const DescriptorType &descriptor)
Copyright (c) 2021 ARM Limited and Contributors.
std::vector< TensorInfo > m_InputTensorInfos
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
std::vector< TensorInfo > m_OutputTensorInfos
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
std::vector< ITensorHandle * > m_Outputs
Contains information about TensorInfos of a layer.
std::vector< ITensorHandle * > m_Inputs