ArmNN
 21.11
StridedSliceAsyncEndToEndTest.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
8 #include <ResolveType.hpp>
9 
11 #include <armnn/INetwork.hpp>
12 #include <armnn/Threadpool.hpp>
14 
17 
18 #include <doctest/doctest.h>
19 
20 #include <vector>
21 
22 namespace armnn
23 {
24 
25 namespace experimental
26 {
27 
28 template<DataType ArmnnIType, DataType ArmnnOType,
29  typename TInput = ResolveType <ArmnnIType>, typename TOutput = ResolveType <ArmnnOType>>
31  const std::vector<std::map<int, std::vector<TInput>>>& inputTensorData,
32  const std::vector<std::map<int, std::vector<TOutput>>>& expectedOutputData,
33  std::vector<BackendId> backends,
34  const size_t numberOfInferences,
35  float tolerance = 0.000001f)
36 {
37  // Create Runtime in which test will run
39  IRuntimePtr runtime(IRuntime::Create(options));
40 
41  // Optimize the Network
42  IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec());
43 
44 
45  // Creates AsyncNetwork
46  NetworkId networkId = 0;
47  std::string errorMessage;
49  runtime->LoadNetwork(networkId, std::move(optNet), errorMessage, networkProperties);
50 
51  std::vector<InputTensors> inputTensorsVec;
52  std::vector<OutputTensors> outputTensorsVec;
53  std::vector<std::map<int, std::vector<TOutput>>> outputStorageVec;
54  std::vector<std::unique_ptr<IWorkingMemHandle>> workingMemHandles;
55 
56  for (unsigned int i = 0; i < numberOfInferences; ++i)
57  {
58  InputTensors inputTensors;
59  OutputTensors outputTensors;
60  outputStorageVec.emplace_back(std::map<int, std::vector<TOutput>>());
61 
62  inputTensors.reserve(inputTensorData.size());
63  for (auto&& it : inputTensorData[i])
64  {
65  TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(networkId, it.first);
66  inputTensorInfo.SetConstant(true);
67  inputTensors.push_back({it.first,
68  ConstTensor(inputTensorInfo, it.second.data())});
69  }
70 
71  outputTensors.reserve(expectedOutputData.size());
72  for (auto&& it : expectedOutputData[i])
73  {
74  std::vector<TOutput> out(it.second.size());
75  outputStorageVec[i].emplace(it.first, out);
76  outputTensors.push_back({it.first,
77  Tensor(runtime->GetOutputTensorInfo(networkId, it.first),
78  outputStorageVec[i].at(it.first).data())});
79  }
80 
81  inputTensorsVec.push_back(inputTensors);
82  outputTensorsVec.push_back(outputTensors);
83 
84  workingMemHandles.push_back(runtime->CreateWorkingMemHandle(networkId));
85  }
86 
87  std::vector<std::thread> threads;
88  for (unsigned int i = 0; i < numberOfInferences; ++i)
89  {
90  // Access the vectors before we do anything multi-threaded
91  InputTensors& inputTensors = inputTensorsVec[i];
92  OutputTensors& outputTensors = outputTensorsVec[i];
93  IWorkingMemHandle& workingMemHandle = *workingMemHandles[i].get();
94 
95  threads.emplace_back([&]()
96  {
97  // Run the async network
98  runtime->Execute(workingMemHandle, inputTensors, outputTensors);
99  });
100  }
101 
102  for (unsigned int i = 0; i < numberOfInferences; ++i)
103  {
104  threads[i].join();
105  }
106 
107  // Checks the results.
108  for (unsigned int i = 0; i < numberOfInferences; ++i)
109  {
110  for (auto &&it : expectedOutputData[i])
111  {
112  std::vector<TOutput> out = outputStorageVec[i].at(it.first);
113  for (unsigned int j = 0; j < out.size(); ++j)
114  {
115  CHECK(Compare<ArmnnOType>(it.second[j], out[j], tolerance) == true);
116  }
117  }
118  }
119 
120 }
121 
122 template<DataType ArmnnIType, DataType ArmnnOType,
123  typename TInput = ResolveType<ArmnnIType>, typename TOutput = ResolveType<ArmnnOType>>
125  const std::map<int, std::vector<TInput>>& inputTensorData,
126  const std::map<int, std::vector<TOutput>>& expectedOutputData,
127  std::vector<BackendId> backends,
128  float tolerance = 0.000001f,
129  size_t numThreads = 1)
130 {
131  // Create Runtime in which test will run
133  IRuntimePtr runtime(IRuntime::Create(options));
134 
135  // Optimize the Network
136  IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec());
137 
138  // Creates AsyncNetwork
139  NetworkId networkId = 0;
140 
141  std::string errorMessage;
142 
144 
145  runtime->LoadNetwork(networkId, std::move(optNet), errorMessage, networkProperties);
146 
147  InputTensors inputTensors;
148  inputTensors.reserve(inputTensorData.size());
149  for (auto&& it : inputTensorData)
150  {
151  TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(networkId, it.first);
152  inputTensorInfo.SetConstant(true);
153  inputTensors.push_back({it.first,
154  ConstTensor(inputTensorInfo, it.second.data())});
155  }
156 
157  OutputTensors outputTensors;
158  outputTensors.reserve(expectedOutputData.size());
159  std::map<int, std::vector<TOutput>> outputStorage;
160  for (auto&& it : expectedOutputData)
161  {
162  std::vector<TOutput> out(it.second.size());
163  outputStorage.emplace(it.first, out);
164  outputTensors.push_back({it.first,
165  Tensor(runtime->GetOutputTensorInfo(networkId, it.first),
166  outputStorage.at(it.first).data())});
167  }
168 
169  if (numThreads <= 1)
170  {
171  // Create WorkingMemHandle for this async network
172  std::unique_ptr<IWorkingMemHandle> workingMemHandle = runtime->CreateWorkingMemHandle(networkId);
173  IWorkingMemHandle& workingMemHandleRef = *workingMemHandle.get();
174 
175  // Run the async network
176  runtime->Execute(workingMemHandleRef, inputTensors, outputTensors);
177  }
178  else
179  {
180  std::vector<std::shared_ptr<IWorkingMemHandle>> memHandles;
181 
182  for (size_t i = 0; i < numThreads; ++i)
183  {
184  memHandles.emplace_back(runtime->CreateWorkingMemHandle(networkId));
185  }
186 
187  Threadpool threadpool(numThreads, runtime.get(), memHandles);
188  AsyncCallbackManager callbackManager;
189 
190  // For the asyncronous execution, we are adding a pool of working memory handles (1 per thread) in the
191  // LoadedNetwork with a each scheduled inference having a spefic priority
192  for (size_t i = 0; i < 1000; ++i)
193  {
194  threadpool.Schedule(networkId,
195  inputTensors,
196  outputTensors,
197  static_cast<QosExecPriority>(rand()%3),
198  callbackManager.GetNewCallback());
199  }
200 
201  // Wait until the execution signals a notify
202  for (size_t i = 0; i < 1000; ++i)
203  {
204  auto cb = callbackManager.GetNotifiedCallback();
205 
206  // Checks the results.
207  CHECK(cb->GetStatus() == Status::Success);
208  }
209  }
210 
211  for (auto&& it : expectedOutputData)
212  {
213  std::vector<TOutput> out = outputStorage.at(it.first);
214 
215  for (unsigned int i = 0; i < out.size(); ++i)
216  {
217  CHECK(Compare<ArmnnOType>(it.second[i], out[i], tolerance) == true);
218  }
219  }
220 }
221 
222 template<typename armnn::DataType DataType>
224  const TensorShape& outputShape,
225  const std::vector<int>& beginData,
226  const std::vector<int>& endData,
227  const std::vector<int>& stridesData,
228  int beginMask = 0,
229  int endMask = 0,
230  int shrinkAxisMask = 0,
231  int ellipsisMask = 0,
232  int newAxisMask = 0,
233  const float qScale = 1.0f,
234  const int32_t qOffset = 0)
235 {
236  using namespace armnn;
237  // Builds up the structure of the network.
239 
240  TensorInfo inputTensorInfo(inputShape, DataType, qScale, qOffset);
241  TensorInfo outputTensorInfo(outputShape, DataType, qScale, qOffset);
242 
243  armnn::StridedSliceDescriptor stridedSliceDescriptor;
244  stridedSliceDescriptor.m_Begin = beginData;
245  stridedSliceDescriptor.m_End = endData;
246  stridedSliceDescriptor.m_Stride = stridesData;
247  stridedSliceDescriptor.m_BeginMask = beginMask;
248  stridedSliceDescriptor.m_EndMask = endMask;
249  stridedSliceDescriptor.m_ShrinkAxisMask = shrinkAxisMask;
250  stridedSliceDescriptor.m_EllipsisMask = ellipsisMask;
251  stridedSliceDescriptor.m_NewAxisMask = newAxisMask;
252 
253  IConnectableLayer* input = net->AddInputLayer(0, "Input_Layer");
254  IConnectableLayer* stridedSlice = net->AddStridedSliceLayer(stridedSliceDescriptor, "splitter");
255  IConnectableLayer* output = net->AddOutputLayer(0);
256 
257  Connect(input, stridedSlice, inputTensorInfo, 0, 0);
258  Connect(stridedSlice, output, outputTensorInfo, 0, 0);
259 
260  return net;
261 }
262 
263 template<armnn::DataType ArmnnType>
264 void StridedSlicedEndToEndTest(const std::vector<BackendId>& backends, size_t numThreads)
265 {
266  using namespace armnn;
267  using T = ResolveType<ArmnnType>;
268 
269  const TensorShape& inputShape = {3, 2, 3, 1};
270  const TensorShape& outputShape = {1, 2, 3, 1};
271  const std::vector<int>& beginData = {1, 0, 0, 0};
272  const std::vector<int>& endData = {2, 2, 3, 1};
273  const std::vector<int>& stridesData = {1, 1, 1, 1};
274  int beginMask = 0;
275  int endMask = 0;
276  int shrinkAxisMask = 0;
277  int ellipsisMask = 0;
278  int newAxisMask = 0;
279 
280  // Builds up the structure of the network
281  INetworkPtr net = CreateStridedSliceNetwork<ArmnnType>(inputShape,
282  outputShape,
283  beginData,
284  endData,
285  stridesData,
286  beginMask,
287  endMask,
288  shrinkAxisMask,
289  ellipsisMask,
290  newAxisMask);
291 
292  CHECK(net);
293  // Creates structures for input & output.
294  std::vector<T> inputData{
295  1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f,
296 
297  3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f,
298 
299  5.0f, 5.0f, 5.0f, 6.0f, 6.0f, 6.0f
300  };
301 
302  std::vector<T> outputExpected{
303  3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f
304  };
305 
306  std::map<int, std::vector<T>> inputTensorData = {{0, inputData}};
307  std::map<int, std::vector<T>> expectedOutputData = {{0, outputExpected}};
308 
309  AsyncEndToEndTestImpl<ArmnnType, ArmnnType>(move(net),
310  inputTensorData,
311  expectedOutputData,
312  backends,
313  0.000001f,
314  numThreads);
315 }
316 
317 template<armnn::DataType ArmnnType>
318 void StridedSlicedMultiThreadedEndToEndTest(const std::vector<BackendId>& backends)
319 {
320  using namespace armnn;
321  using T = ResolveType<ArmnnType>;
322 
323  const TensorShape& inputShape = {3, 2, 3, 1};
324  const TensorShape& outputShape = {1, 2, 3, 1};
325  const std::vector<int>& beginData = {1, 0, 0, 0};
326  const std::vector<int>& endData = {2, 2, 3, 1};
327  const std::vector<int>& stridesData = {1, 1, 1, 1};
328  int beginMask = 0;
329  int endMask = 0;
330  int shrinkAxisMask = 0;
331  int ellipsisMask = 0;
332  int newAxisMask = 0;
333 
334  // Builds up the structure of the network
335  INetworkPtr net = CreateStridedSliceNetwork<ArmnnType>(inputShape,
336  outputShape,
337  beginData,
338  endData,
339  stridesData,
340  beginMask,
341  endMask,
342  shrinkAxisMask,
343  ellipsisMask,
344  newAxisMask);
345 
346  CHECK(net);
347 
348  // Creates structures for input & output.
349  std::vector<T> inputData1{
350  1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f,
351 
352  3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f,
353 
354  5.0f, 5.0f, 5.0f, 6.0f, 6.0f, 6.0f
355  };
356 
357  std::vector<T> outputExpected1{ 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f };
358 
359  // Creates structures for input & output.
360  std::vector<T> inputData2{
361  1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f,
362 
363  8.0f, 8.0f, 8.0f, 7.0f, 7.0f, 7.0f,
364 
365  5.0f, 5.0f, 5.0f, 6.0f, 6.0f, 6.0f
366  };
367 
368  std::vector<T> outputExpected2{ 8.0f, 8.0f, 8.0f, 7.0f, 7.0f, 7.0f };
369 
370  std::vector<std::map<int, std::vector<T>>> inputTensors;
371  std::vector<std::map<int, std::vector<T>>> outputTensors;
372 
373  inputTensors.push_back(std::map<int, std::vector<T>> {{0, inputData1}});
374  inputTensors.push_back(std::map<int, std::vector<T>> {{0, inputData2}});
375  outputTensors.push_back(std::map<int, std::vector<T>> {{0, outputExpected1}});
376  outputTensors.push_back(std::map<int, std::vector<T>> {{0, outputExpected2}});
377 
378  AsyncThreadedEndToEndTestImpl<ArmnnType, ArmnnType>(move(net), inputTensors, outputTensors, backends, 2);
379 }
380 
381 } // experimental namespace
382 
383 } // armnn namespace
384 
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:40
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:61
int32_t m_ShrinkAxisMask
Shrink axis mask value. If set, the nth specification shrinks the dimensionality by 1...
void AsyncThreadedEndToEndTestImpl(INetworkPtr network, const std::vector< std::map< int, std::vector< TInput >>> &inputTensorData, const std::vector< std::map< int, std::vector< TOutput >>> &expectedOutputData, std::vector< BackendId > backends, const size_t numberOfInferences, float tolerance=0.000001f)
std::vector< int > m_Begin
Begin values for the input that will be sliced.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:31
typename ResolveTypeImpl< DT >::Type ResolveType
Definition: ResolveType.hpp:79
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
Copyright (c) 2021 ARM Limited and Contributors.
int32_t m_BeginMask
Begin mask value.
int32_t m_EndMask
End mask value.
void StridedSlicedEndToEndTest(const std::vector< BackendId > &backends, size_t numThreads)
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
void StridedSlicedMultiThreadedEndToEndTest(const std::vector< BackendId > &backends)
DataType
Definition: Types.hpp:35
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1605
int32_t m_NewAxisMask
New axis mask value.
void AsyncEndToEndTestImpl(INetworkPtr network, const std::map< int, std::vector< TInput >> &inputTensorData, const std::map< int, std::vector< TOutput >> &expectedOutputData, std::vector< BackendId > backends, float tolerance=0.000001f, size_t numThreads=1)
int32_t m_EllipsisMask
Ellipsis mask value.
int NetworkId
Definition: IRuntime.hpp:25
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:198
std::vector< int > m_Stride
Stride values for the input that will be sliced.
INetworkPtr CreateStridedSliceNetwork(const TensorShape &inputShape, const TensorShape &outputShape, const std::vector< int > &beginData, const std::vector< int > &endData, const std::vector< int > &stridesData, int beginMask=0, int endMask=0, int shrinkAxisMask=0, int ellipsisMask=0, int newAxisMask=0, const float qScale=1.0f, const int32_t qOffset=0)
std::vector< int > m_End
End values for the input that will be sliced.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:516
A StridedSliceDescriptor for the StridedSliceLayer.
void Connect(armnn::IConnectableLayer *from, armnn::IConnectableLayer *to, const armnn::TensorInfo &tensorInfo, unsigned int fromIndex, unsigned int toIndex)
Definition: TestUtils.cpp:12
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:197
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:478
std::shared_ptr< AsyncExecutionCallback > GetNotifiedCallback()