ArmNN
 22.02
DefaultAsyncExecuteTest.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include <armnn/Exceptions.hpp>
7 
10 
11 #include <doctest/doctest.h>
12 
13 using namespace armnn;
14 
15 
16 namespace
17 {
18 
19 TEST_SUITE("WorkloadAsyncExecuteTests")
20 {
21 
22 struct Workload0 : BaseWorkload<ElementwiseUnaryQueueDescriptor>
23 {
24  Workload0(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info)
25  : BaseWorkload(descriptor, info)
26  {
27  }
28 
30  {
31  }
32 
33  void Execute() const
34  {
35  int* inVals = static_cast<int*>(m_Data.m_Inputs[0][0].Map());
36  int* outVals = static_cast<int*>(m_Data.m_Outputs[0][0].Map());
37 
38  for (unsigned int i = 0;
39  i < m_Data.m_Inputs[0][0].GetShape().GetNumElements();
40  ++i)
41  {
42  outVals[i] = inVals[i] * outVals[i];
43  inVals[i] = outVals[i];
44  }
45  }
46 
47  void ExecuteAsync(WorkingMemDescriptor& desc)
48  {
49  int* inVals = static_cast<int*>(desc.m_Inputs[0][0].Map());
50  int* outVals = static_cast<int*>(desc.m_Outputs[0][0].Map());
51 
52  for (unsigned int i = 0;
53  i < desc.m_Inputs[0][0].GetShape().GetNumElements();
54  ++i)
55  {
56  outVals[i] = inVals[i] + outVals[i];
57  inVals[i] = outVals[i];
58  }
59  }
60 
61  QueueDescriptor* GetQueueDescriptor()
62  {
63  return &m_Data;
64  }
65 };
66 
67 struct Workload1 : BaseWorkload<ElementwiseUnaryQueueDescriptor>
68 {
69  Workload1(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info)
70  : BaseWorkload(descriptor, info)
71  {
72  }
73 
74  void Execute() const
75  {
76  int* inVals = static_cast<int*>(m_Data.m_Inputs[0][0].Map());
77  int* outVals = static_cast<int*>(m_Data.m_Outputs[0][0].Map());
78 
79  for (unsigned int i = 0;
80  i < m_Data.m_Inputs[0][0].GetShape().GetNumElements();
81  ++i)
82  {
83  outVals[i] = inVals[i] * outVals[i];
84  inVals[i] = outVals[i];
85  }
86  }
87 };
88 
89 void ValidateTensor(ITensorHandle* tensorHandle, int expectedValue)
90 {
91  int* actualOutput = static_cast<int*>(tensorHandle->Map());
92 
93  bool allValuesCorrect = true;
94  for (unsigned int i = 0;
95  i < tensorHandle->GetShape().GetNumElements();
96  ++i)
97  {
98  if (actualOutput[i] != expectedValue)
99  {
100  allValuesCorrect = false;
101  }
102  }
103 
104  CHECK(allValuesCorrect);
105 }
106 
107 template<typename Workload>
108 std::unique_ptr<Workload> CreateWorkload(TensorInfo info, ITensorHandle* inputTensor, ITensorHandle* outputTensor)
109 {
110  WorkloadInfo workloadInfo;
111  workloadInfo.m_InputTensorInfos = std::vector<TensorInfo>{info};
112  workloadInfo.m_OutputTensorInfos = std::vector<TensorInfo>{info};
113 
114  ElementwiseUnaryQueueDescriptor elementwiseUnaryQueueDescriptor;
115  elementwiseUnaryQueueDescriptor.m_Inputs = std::vector<ITensorHandle*>{inputTensor};
116  elementwiseUnaryQueueDescriptor.m_Outputs = std::vector<ITensorHandle*>{outputTensor};
117 
118  return std::make_unique<Workload>(elementwiseUnaryQueueDescriptor, workloadInfo);
119 }
120 
121 TEST_CASE("TestAsyncExecute")
122 {
123  TensorInfo info({5}, DataType::Signed32, 0.0, 0, true);
124 
125  int inVals[5]{2, 2, 2, 2, 2};
126  int outVals[5]{1, 1, 1, 1, 1};
127 
128  int expectedExecuteval = 2;
129  int expectedExecuteAsyncval = 3;
130 
131  ConstTensor constInputTensor(info, inVals);
132  ConstTensor constOutputTensor(info, outVals);
133 
134  ScopedTensorHandle syncInput0(constInputTensor);
135  ScopedTensorHandle syncOutput0(constOutputTensor);
136 
137  std::unique_ptr<Workload0> workload0 = CreateWorkload<Workload0>(info, &syncInput0, &syncOutput0);
138 
139  workload0.get()->Execute();
140 
141  ScopedTensorHandle asyncInput0(constInputTensor);
142  ScopedTensorHandle asyncOutput0(constOutputTensor);
143 
144  WorkingMemDescriptor workingMemDescriptor0;
145  workingMemDescriptor0.m_Inputs = std::vector<ITensorHandle*>{&asyncInput0};
146  workingMemDescriptor0.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput0};
147 
148  workload0.get()->ExecuteAsync(workingMemDescriptor0);
149 
150  // Inputs are also changed by the execute/executeAsync calls to make sure there is no interference with them
151  ValidateTensor(workingMemDescriptor0.m_Outputs[0], expectedExecuteAsyncval);
152  ValidateTensor(workingMemDescriptor0.m_Inputs[0], expectedExecuteAsyncval);
153 
154  ValidateTensor(&workload0.get()->GetQueueDescriptor()->m_Outputs[0][0], expectedExecuteval);
155  ValidateTensor(&workload0.get()->GetQueueDescriptor()->m_Inputs[0][0], expectedExecuteval);
156 }
157 
158 TEST_CASE("TestDefaultAsyncExecute")
159 {
160  TensorInfo info({5}, DataType::Signed32, 0.0f, 0, true);
161 
162  std::vector<int> inVals{2, 2, 2, 2, 2};
163  std::vector<int> outVals{1, 1, 1, 1, 1};
164  std::vector<int> defaultVals{0, 0, 0, 0, 0};
165 
166  int expectedExecuteval = 2;
167 
168  ConstTensor constInputTensor(info, inVals);
169  ConstTensor constOutputTensor(info, outVals);
170  ConstTensor defaultTensor(info, &defaultVals);
171 
172  ScopedTensorHandle defaultInput = ScopedTensorHandle(defaultTensor);
173  ScopedTensorHandle defaultOutput = ScopedTensorHandle(defaultTensor);
174 
175  std::unique_ptr<Workload1> workload1 = CreateWorkload<Workload1>(info, &defaultInput, &defaultOutput);
176 
177  ScopedTensorHandle asyncInput(constInputTensor);
178  ScopedTensorHandle asyncOutput(constOutputTensor);
179 
180  WorkingMemDescriptor workingMemDescriptor;
181  workingMemDescriptor.m_Inputs = std::vector<ITensorHandle*>{&asyncInput};
182  workingMemDescriptor.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput};
183 
184  workload1.get()->ExecuteAsync(workingMemDescriptor);
185 
186  // workload1 has no AsyncExecute implementation and so should use the default workload AsyncExecute
187  // implementation which will call workload1.Execute() in a thread safe manner
188  ValidateTensor(workingMemDescriptor.m_Outputs[0], expectedExecuteval);
189  ValidateTensor(workingMemDescriptor.m_Inputs[0], expectedExecuteval);
190 }
191 
192 TEST_CASE("TestDefaultAsyncExeuteWithThreads")
193 {
194  // Use a large vector so the threads have a chance to interact
195  unsigned int vecSize = 1000;
196  TensorInfo info({vecSize}, DataType::Signed32, 0.0f, 0, true);
197 
198  std::vector<int> inVals1(vecSize, 2);
199  std::vector<int> outVals1(vecSize, 1);
200  std::vector<int> inVals2(vecSize, 5);
201  std::vector<int> outVals2(vecSize, -1);
202 
203  std::vector<int> defaultVals(vecSize, 0);
204 
205  int expectedExecuteval1 = 4;
206  int expectedExecuteval2 = 25;
207  ConstTensor constInputTensor1(info, inVals1);
208  ConstTensor constOutputTensor1(info, outVals1);
209 
210  ConstTensor constInputTensor2(info, inVals2);
211  ConstTensor constOutputTensor2(info, outVals2);
212 
213  ConstTensor defaultTensor(info, defaultVals.data());
214 
215  ScopedTensorHandle defaultInput = ScopedTensorHandle(defaultTensor);
216  ScopedTensorHandle defaultOutput = ScopedTensorHandle(defaultTensor);
217  std::unique_ptr<Workload1> workload = CreateWorkload<Workload1>(info, &defaultInput, &defaultOutput);
218 
219  ScopedTensorHandle asyncInput1(constInputTensor1);
220  ScopedTensorHandle asyncOutput1(constOutputTensor1);
221 
222  WorkingMemDescriptor workingMemDescriptor1;
223  workingMemDescriptor1.m_Inputs = std::vector<ITensorHandle*>{&asyncInput1};
224  workingMemDescriptor1.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput1};
225 
226 
227  ScopedTensorHandle asyncInput2(constInputTensor2);
228  ScopedTensorHandle asyncOutput2(constOutputTensor2);
229 
230  WorkingMemDescriptor workingMemDescriptor2;
231  workingMemDescriptor2.m_Inputs = std::vector<ITensorHandle*>{&asyncInput2};
232  workingMemDescriptor2.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput2};
233 
234  std::thread thread1 = std::thread([&]()
235  {
236  workload.get()->ExecuteAsync(workingMemDescriptor1);
237  workload.get()->ExecuteAsync(workingMemDescriptor1);
238  });
239 
240  std::thread thread2 = std::thread([&]()
241  {
242  workload.get()->ExecuteAsync(workingMemDescriptor2);
243  workload.get()->ExecuteAsync(workingMemDescriptor2);
244  });
245 
246  thread1.join();
247  thread2.join();
248 
249  ValidateTensor(workingMemDescriptor1.m_Outputs[0], expectedExecuteval1);
250  ValidateTensor(workingMemDescriptor1.m_Inputs[0], expectedExecuteval1);
251 
252  ValidateTensor(workingMemDescriptor2.m_Outputs[0], expectedExecuteval2);
253  ValidateTensor(workingMemDescriptor2.m_Inputs[0], expectedExecuteval2);
254 }
255 
256 }
257 
258 }
TEST_SUITE("TestConstTensorLayerVisitor")
unsigned int GetNumElements() const
Function that calculates the tensor elements by multiplying all dimension size which are Specified...
Definition: Tensor.cpp:181
std::unique_ptr< armnn::IWorkload > CreateWorkload(const armnn::IWorkloadFactory &workloadFactory, const armnn::WorkloadInfo &info, const DescriptorType &descriptor)
Copyright (c) 2021 ARM Limited and Contributors.
std::vector< TensorInfo > m_InputTensorInfos
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
std::vector< TensorInfo > m_OutputTensorInfos
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
std::vector< ITensorHandle * > m_Outputs
Contains information about TensorInfos of a layer.
std::vector< ITensorHandle * > m_Inputs