ArmNN
 21.02
NeonWorkloadFactory.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NeonBackendId.hpp"
8 #include "NeonTensorHandle.hpp"
10 
11 #include <Layer.hpp>
12 
13 #include <armnn/Utils.hpp>
17 
22 
25 
26 namespace armnn
27 {
28 
29 namespace
30 {
31 static const BackendId s_Id{NeonBackendId()};
32 }
33 
35  Optional<DataType> dataType,
36  std::string& outReasonIfUnsupported)
37 {
38  return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
39 }
40 
42  Optional<DataType> dataType,
43  std::string& outReasonIfUnsupported,
44  const ModelOptions& modelOptions)
45 {
46  return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions);
47 }
48 
50 {
51  return s_Id;
52 }
53 
54 void NeonWorkloadFactory::SetNumberOfThreads()
55 {
56  if (m_ModelContextPtr)
57  {
58  const unsigned int MIN_THREADS = 1;
59  const unsigned int MAX_THREADS = 64;
60 
61  // Set the number of threads to be used if the user has set NumberOfThreads param
62  // Only set if within limit or valid input
63  auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
64  auto numberOfThreads = modelOptions->GetNumberOfThreads();
65 
66  if (numberOfThreads != 0 && numberOfThreads >= MIN_THREADS && numberOfThreads <= MAX_THREADS)
67  {
68  arm_compute::Scheduler::get().set_num_threads(numberOfThreads);
69  }
70  }
71 }
72 
73 NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager)
74  : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{})
75 {
76  SetNumberOfThreads();
77 }
78 
79 NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager,
81  : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr)
82 {
83  SetNumberOfThreads();
84 }
85 
86 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
87  TensorShape const& subTensorShape,
88  unsigned int const* subTensorOrigin) const
89 {
90  const arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape);
91 
93  coords.set_num_dimensions(subTensorShape.GetNumDimensions());
94  for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++)
95  {
96  // Arm compute indexes tensor coords in reverse order.
97  unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1;
98  coords.set(i, armnn::numeric_cast<int>(subTensorOrigin[revertedIndex]));
99  }
100 
101  const arm_compute::TensorShape parentShape = armcomputetensorutils::BuildArmComputeTensorShape(parent.GetShape());
102  if (!::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, parentShape, coords, shape))
103  {
104  return nullptr;
105  }
106 
107  return std::make_unique<NeonSubTensorHandle>(
108  PolymorphicDowncast<IAclTensorHandle*>(&parent), shape, coords);
109 }
110 
111 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
112  const bool IsMemoryManaged) const
113 {
114  auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo);
115  if (IsMemoryManaged)
116  {
117  tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
118  }
119  return tensorHandle;
120 }
121 
122 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
123  DataLayout dataLayout,
124  const bool IsMemoryManaged) const
125 {
126  auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo, dataLayout);
127  if (IsMemoryManaged)
128  {
129  tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
130  }
131  return tensorHandle;
132 }
133 
134 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateAbs(const AbsQueueDescriptor& descriptor,
135  const WorkloadInfo& info) const
136 {
137  IgnoreUnused(descriptor);
138 
139  ElementwiseUnaryQueueDescriptor elementwiseUnaryDescriptor;
140  elementwiseUnaryDescriptor.m_Parameters = ElementwiseUnaryDescriptor(UnaryOperation::Abs);
141 
142  return CreateElementwiseUnary(elementwiseUnaryDescriptor, info);
143 }
144 
145 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
146  const WorkloadInfo& info) const
147 {
148  return std::make_unique<NeonActivationWorkload>(descriptor, info);
149 }
150 
151 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
152  const WorkloadInfo& info) const
153 {
154  return std::make_unique<NeonAdditionWorkload>(descriptor, info);
155 }
156 
157 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateArgMinMax(const ArgMinMaxQueueDescriptor& descriptor,
158  const WorkloadInfo& info) const
159 {
160  return std::make_unique<NeonArgMinMaxWorkload>(descriptor, info);
161 }
162 
163 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateBatchNormalization(
164  const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
165 {
166  return std::make_unique<NeonBatchNormalizationWorkload>(descriptor, info);
167 }
168 
170  const WorkloadInfo& info) const
171 {
172  return std::make_unique<NeonBatchToSpaceNdWorkload>(descriptor, info);
173 }
174 
175 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateComparison(const ComparisonQueueDescriptor& descriptor,
176  const WorkloadInfo& info) const
177 {
178  return std::make_unique<NeonComparisonWorkload>(descriptor, info);
179 }
180 
181 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConcat(const ConcatQueueDescriptor& descriptor,
182  const WorkloadInfo& info) const
183 {
184  return std::make_unique<NeonConcatWorkload>(descriptor, info);
185 }
186 
187 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
188  const WorkloadInfo& info) const
189 {
190  return std::make_unique<NeonConstantWorkload>(descriptor, info);
191 }
192 
194  const ConvertBf16ToFp32QueueDescriptor& descriptor,
195  const WorkloadInfo& info) const
196 {
197  return std::make_unique<NeonConvertBf16ToFp32Workload>(descriptor, info);
198 }
199 
201  const ConvertFp16ToFp32QueueDescriptor& descriptor,
202  const WorkloadInfo& info) const
203 {
204  return std::make_unique<NeonConvertFp16ToFp32Workload>(descriptor, info);
205 }
206 
208  const ConvertFp32ToBf16QueueDescriptor& descriptor,
209  const WorkloadInfo& info) const
210 {
211  return std::make_unique<NeonConvertFp32ToBf16Workload>(descriptor, info);
212 }
213 
215  const ConvertFp32ToFp16QueueDescriptor& descriptor,
216  const WorkloadInfo& info) const
217 {
218  return std::make_unique<NeonConvertFp32ToFp16Workload>(descriptor, info);
219 }
220 
221 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution2d(
222  const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
223 {
224  bool isFastMathEnabled = false;
225  if (m_ModelContextPtr)
226  {
227  if (m_ModelContextPtr.get() != nullptr)
228  {
229  auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
230  if (modelOptions)
231  {
232  isFastMathEnabled = modelOptions->IsFastMathEnabled();
233  }
234  }
235  }
236  return std::make_unique<NeonConvolution2dWorkload>(descriptor,
237  info,
238  m_MemoryManager->GetIntraLayerManager(),
239  isFastMathEnabled);
240 }
241 
242 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor,
243  const WorkloadInfo& info) const
244 {
245  return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
246 }
247 
248 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthToSpace(const DepthToSpaceQueueDescriptor& descriptor,
249  const WorkloadInfo& info) const
250 {
251  return std::make_unique<NeonDepthToSpaceWorkload>(descriptor, info);
252 }
253 
255  const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
256 {
257  return std::make_unique<NeonDepthwiseConvolutionWorkload>(descriptor, info);
258 }
259 
260 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDequantize(const DequantizeQueueDescriptor& descriptor,
261  const WorkloadInfo& info) const
262 {
263  return std::make_unique<NeonDequantizeWorkload>(descriptor, info);
264 }
265 
268 {
269  return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
270 }
271 
272 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateDivision(
273  const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const
274 {
275  return std::make_unique<NeonDivisionWorkload>(descriptor, info);
276 }
277 
279  const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info) const
280 {
281  switch(descriptor.m_Parameters.m_Operation)
282  {
283  case UnaryOperation::Abs:
284  {
285  AbsQueueDescriptor absQueueDescriptor;
286  absQueueDescriptor.m_Inputs = descriptor.m_Inputs;
287  absQueueDescriptor.m_Outputs = descriptor.m_Outputs;
288 
289  return std::make_unique<NeonAbsWorkload>(absQueueDescriptor, info);
290  }
292  {
293  RsqrtQueueDescriptor rsqrtQueueDescriptor;
294  rsqrtQueueDescriptor.m_Inputs = descriptor.m_Inputs;
295  rsqrtQueueDescriptor.m_Outputs = descriptor.m_Outputs;
296 
297  return std::make_unique<NeonRsqrtWorkload>(rsqrtQueueDescriptor, info);
298  }
299  case UnaryOperation::Neg:
300  return std::make_unique<NeonNegWorkload>(descriptor, info);
301  case UnaryOperation::Exp:
302  return std::make_unique<NeonExpWorkload>(descriptor, info);
304  return std::make_unique<NeonLogicalNotWorkload>(descriptor, info);
305  default:
306  return nullptr;
307  }
308 }
309 
310 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateEqual(const EqualQueueDescriptor& descriptor,
311  const WorkloadInfo& info) const
312 {
313  IgnoreUnused(descriptor);
314 
315  ComparisonQueueDescriptor comparisonDescriptor;
317 
318  return CreateComparison(comparisonDescriptor, info);
319 }
320 
321 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFill(const FillQueueDescriptor& descriptor,
322  const WorkloadInfo& info) const
323 {
324  return std::make_unique<NeonFillWorkload>(descriptor, info);
325 }
326 
327 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
328  const WorkloadInfo& info) const
329 {
330  return MakeWorkloadHelper<NeonFloorFloatWorkload, NullWorkload>(descriptor, info);
331 }
332 
333 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateFullyConnected(
334  const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const
335 {
336  return std::make_unique<NeonFullyConnectedWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
337 }
338 
339 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateGather(const armnn::GatherQueueDescriptor& descriptor,
340  const armnn::WorkloadInfo& info) const
341 {
342  return std::make_unique<NeonGatherWorkload>(descriptor, info);
343 }
344 
345 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateGreater(const GreaterQueueDescriptor& descriptor,
346  const WorkloadInfo& info) const
347 {
348  IgnoreUnused(descriptor);
349 
350  ComparisonQueueDescriptor comparisonDescriptor;
352 
353  return CreateComparison(comparisonDescriptor, info);
354 }
355 
356 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
357  const WorkloadInfo& info) const
358 {
359  return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
360 }
361 
363  const InstanceNormalizationQueueDescriptor& descriptor,
364  const WorkloadInfo& info) const
365 {
366  return std::make_unique<NeonInstanceNormalizationWorkload>(descriptor, info);
367 }
368 
370  const WorkloadInfo& info) const
371 {
372  return MakeWorkloadHelper<NeonL2NormalizationFloatWorkload, NullWorkload>(descriptor, info,
373  m_MemoryManager->GetIntraLayerManager());
374 }
375 
376 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLogSoftmax(const LogSoftmaxQueueDescriptor& descriptor,
377  const WorkloadInfo& info) const
378 {
379  return std::make_unique<NeonLogSoftmaxWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
380 }
381 
382 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLogicalBinary(const LogicalBinaryQueueDescriptor& descriptor,
383  const WorkloadInfo& info) const
384 {
385  switch(descriptor.m_Parameters.m_Operation)
386  {
388  return std::make_unique<NeonLogicalAndWorkload>(descriptor, info);
390  return std::make_unique<NeonLogicalOrWorkload>(descriptor, info);
391  default:
392  return nullptr;
393  }
394 }
395 
396 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor,
397  const WorkloadInfo& info) const
398 {
399  return MakeWorkloadHelper<NeonLstmFloatWorkload, NullWorkload>(descriptor, info);
400 }
401 
402 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMaximum(const MaximumQueueDescriptor& descriptor,
403  const WorkloadInfo& info) const
404 {
405  return std::make_unique<NeonMaximumWorkload>(descriptor, info);
406 }
407 
408 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor,
409  const WorkloadInfo& info) const
410 {
411  return std::make_unique<NeonMeanWorkload>(descriptor, info);
412 }
413 
414 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
415  const WorkloadInfo& info) const
416 {
417  if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
418  {
419  throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemCopy workload");
420  }
421 
422  return MakeWorkloadHelper<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info);
423 }
424 
425 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMemImport(const MemImportQueueDescriptor& descriptor,
426  const WorkloadInfo& info) const
427 {
428  if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
429  {
430  throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemImport workload");
431  }
432 
433  return std::make_unique<ImportMemGenericWorkload>(descriptor, info);
434 }
435 
436 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor,
437  const WorkloadInfo& info) const
438 {
439  return CreateConcat(descriptor, info);
440 }
441 
442 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMinimum(const MinimumQueueDescriptor& descriptor,
443  const WorkloadInfo& info) const
444 {
445  return std::make_unique<NeonMinimumWorkload>(descriptor, info);
446 }
447 
448 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMultiplication(
449  const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const
450 {
451  return std::make_unique<NeonMultiplicationWorkload>(descriptor, info);
452 }
453 
454 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateNormalization(
455  const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
456 {
457  return MakeWorkloadHelper<NeonNormalizationFloatWorkload, NullWorkload>(descriptor, info,
458  m_MemoryManager->GetIntraLayerManager());
459 }
460 
461 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
462  const WorkloadInfo& info) const
463 {
464  return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
465 }
466 
467 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
468  const WorkloadInfo& info) const
469 {
470  return std::make_unique<NeonPadWorkload>(descriptor, info);
471 }
472 
473 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
474  const WorkloadInfo& info) const
475 {
476  return std::make_unique<NeonPermuteWorkload>(descriptor, info);
477 }
478 
479 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
480  const WorkloadInfo& info) const
481 {
482  return std::make_unique<NeonPooling2dWorkload>(descriptor, info);
483 }
484 
485 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor,
486  const WorkloadInfo& info) const
487 {
488  return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
489 }
490 
491 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePrelu(const armnn::PreluQueueDescriptor &descriptor,
492  const armnn::WorkloadInfo &info) const
493 {
494  return std::make_unique<NeonPreluWorkload>(descriptor, info);
495 }
496 
497 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateQLstm(const QLstmQueueDescriptor& descriptor,
498  const WorkloadInfo& info) const
499 {
500  return std::make_unique<NeonQLstmWorkload>(descriptor, info);
501 }
502 
503 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateQuantize(const QuantizeQueueDescriptor& descriptor,
504  const WorkloadInfo& info) const
505 {
506  return std::make_unique<NeonQuantizeWorkload>(descriptor, info);
507 }
508 
509 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateQuantizedLstm(const QuantizedLstmQueueDescriptor& descriptor,
510  const WorkloadInfo& info) const
511 {
512  return std::make_unique<NeonQuantizedLstmWorkload>(descriptor, info);
513 }
514 
515 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateRank(const RankQueueDescriptor& descriptor,
516  const WorkloadInfo& info) const
517 {
518  return std::make_unique<NeonRankWorkload>(descriptor, info);
519 }
520 
521 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReduce(const ReduceQueueDescriptor& descriptor,
522  const WorkloadInfo& info) const
523 {
524  return std::make_unique<NeonReduceWorkload>(descriptor, info);
525 }
526 
527 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
528  const WorkloadInfo& info) const
529 {
530  return std::make_unique<NeonReshapeWorkload>(descriptor, info);
531 }
532 
533 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor,
534  const WorkloadInfo& info) const
535 {
536  return std::make_unique<NeonResizeWorkload>(descriptor, info);
537 }
538 
539 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateResizeBilinear(
540  const ResizeBilinearQueueDescriptor& descriptor,
541  const WorkloadInfo& info) const
542 {
543  ResizeQueueDescriptor resizeDescriptor;
544  resizeDescriptor.m_Inputs = descriptor.m_Inputs;
545  resizeDescriptor.m_Outputs = descriptor.m_Outputs;
546 
547  resizeDescriptor.m_Parameters.m_DataLayout = descriptor.m_Parameters.m_DataLayout;
548  resizeDescriptor.m_Parameters.m_TargetWidth = descriptor.m_Parameters.m_TargetWidth;
549  resizeDescriptor.m_Parameters.m_TargetHeight = descriptor.m_Parameters.m_TargetHeight;
550 
551  return CreateResize(resizeDescriptor, info);
552 }
553 
554 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateRsqrt(const RsqrtQueueDescriptor &descriptor,
555  const WorkloadInfo &info) const
556 {
557  IgnoreUnused(descriptor);
558 
559  ElementwiseUnaryQueueDescriptor elementwiseUnaryDescriptor;
560  elementwiseUnaryDescriptor.m_Parameters = ElementwiseUnaryDescriptor(UnaryOperation::Rsqrt);
561 
562  return CreateElementwiseUnary(elementwiseUnaryDescriptor, info);
563 }
564 
565 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSlice(const SliceQueueDescriptor& descriptor,
566  const WorkloadInfo& info) const
567 {
568  return std::make_unique<NeonSliceWorkload>(descriptor, info);
569 }
570 
571 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
572  const WorkloadInfo& info) const
573 {
574  return std::make_unique<NeonSoftmaxWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
575 }
576 
578  const WorkloadInfo& info) const
579 {
580  return std::make_unique<NeonSpaceToBatchNdWorkload>(descriptor, info);
581 }
582 
583 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSpaceToDepth(const SpaceToDepthQueueDescriptor& descriptor,
584  const WorkloadInfo& info) const
585 {
586  return std::make_unique<NeonSpaceToDepthWorkload>(descriptor, info);
587 }
588 
589 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
590  const WorkloadInfo& info) const
591 {
592  return std::make_unique<NeonSplitterWorkload>(descriptor, info);
593 }
594 
595 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateStack(const StackQueueDescriptor& descriptor,
596  const WorkloadInfo& info) const
597 {
598  return std::make_unique<NeonStackWorkload>(descriptor, info);
599 }
600 
601 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
602  const WorkloadInfo& info) const
603 {
604  return std::make_unique<NeonStridedSliceWorkload>(descriptor, info);
605 }
606 
607 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateSubtraction(
608  const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info) const
609 {
610  return std::make_unique<NeonSubtractionWorkload>(descriptor, info);
611 }
612 
613 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateTranspose(const TransposeQueueDescriptor& descriptor,
614  const WorkloadInfo& info) const
615 {
616  return std::make_unique<NeonTransposeWorkload>(descriptor, info);
617 }
618 
620  const TransposeConvolution2dQueueDescriptor &descriptor,
621  const WorkloadInfo &info) const
622 {
623  return std::make_unique<NeonTransposeConvolution2dWorkload>(descriptor, info,
624  m_MemoryManager->GetIntraLayerManager());
625 }
626 
627 } // namespace armnn
std::unique_ptr< IWorkload > CreateDepthToSpace(const DepthToSpaceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDebug(const DebugQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateLstm(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info) const override
UnaryOperation m_Operation
Specifies the elementwiseUnary operation to execute.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:62
std::unique_ptr< IWorkload > CreateLogSoftmax(const LogSoftmaxQueueDescriptor &descriptor, const WorkloadInfo &info) const override
DataLayout
Definition: Types.hpp:50
std::unique_ptr< IWorkload > CreateGather(const GatherQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateNormalization(const NormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateFullyConnected(const FullyConnectedQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateComparison(const ComparisonQueueDescriptor &descriptor, const WorkloadInfo &Info) const override
std::unique_ptr< IWorkload > CreateRsqrt(const RsqrtQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePad(const PadQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
A ComparisonDescriptor for the ComparisonLayer.
Definition: Descriptors.hpp:78
uint32_t m_TargetWidth
Target width value.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
std::vector< BackendOptions > ModelOptions
std::unique_ptr< IWorkload > CreateGreater(const GreaterQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMemImport(const MemImportQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
constexpr const char * NeonBackendId()
std::unique_ptr< IWorkload > CreateL2Normalization(const L2NormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateStridedSlice(const StridedSliceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMinimum(const MinimumQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMultiplication(const MultiplicationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateQuantize(const QuantizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
std::unique_ptr< ITensorHandle > CreateSubTensorHandle(ITensorHandle &parent, TensorShape const &subTensorShape, unsigned int const *subTensorOrigin) const override
std::unique_ptr< IWorkload > CreateConvolution2d(const Convolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
LogicalBinaryOperation m_Operation
Specifies the logical operation to execute.
The NeonBackendModelContext is used to pass in Neon specific backend ModelOptions.
NeonWorkloadFactory(const std::shared_ptr< NeonMemoryManager > &memoryManager)
std::unique_ptr< IWorkload > CreateReshape(const ReshapeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
static bool IsLayerSupported(const Layer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
std::unique_ptr< IWorkload > CreateAbs(const AbsQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSoftmax(const SoftmaxQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateResizeBilinear(const ResizeBilinearQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateEqual(const EqualQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSlice(const SliceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
std::unique_ptr< IWorkload > CreateQuantizedLstm(const QuantizedLstmQueueDescriptor &descriptor, const WorkloadInfo &info) const override
uint32_t m_TargetWidth
Target width value.
const BackendId & GetBackendId() const override
std::unique_ptr< IWorkload > CreateSplitter(const SplitterQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateFill(const FillQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDequantize(const DequantizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMemCopy(const MemCopyQueueDescriptor &descriptor, const WorkloadInfo &info) const override
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
std::unique_ptr< IWorkload > CreateInstanceNormalization(const InstanceNormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
uint32_t m_TargetHeight
Target height value.
uint32_t m_TargetHeight
Target height value.
std::unique_ptr< IWorkload > CreatePreCompiled(const PreCompiledQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateLogicalBinary(const LogicalBinaryQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateOutput(const OutputQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConvertFp32ToBf16(const ConvertFp32ToBf16QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConstant(const ConstantQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateStack(const StackQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateFloor(const FloorQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateActivation(const ActivationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
A ElementwiseUnaryDescriptor for the ElementwiseUnaryLayer.
Definition: Descriptors.hpp:98
std::vector< ITensorHandle * > m_Outputs
std::unique_ptr< IWorkload > CreateMerger(const MergerQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDivision(const DivisionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition: Tensor.cpp:174
std::unique_ptr< IWorkload > CreateReduce(const ReduceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateInput(const InputQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateQLstm(const QLstmQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateBatchNormalization(const BatchNormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor &descriptor, const WorkloadInfo &Info) const override
Contains information about inputs and outputs to a layer.
std::unique_ptr< IWorkload > CreateResize(const ResizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::vector< ITensorHandle * > m_Inputs
std::unique_ptr< IWorkload > CreateConcat(const ConcatQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePooling2d(const Pooling2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor &descriptor, const WorkloadInfo &Info) const override
std::unique_ptr< IWorkload > CreateAddition(const AdditionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDetectionPostProcess(const DetectionPostProcessQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSubtraction(const SubtractionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMaximum(const MaximumQueueDescriptor &descriptor, const WorkloadInfo &info) const override
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
std::unique_ptr< IWorkload > CreateMean(const MeanQueueDescriptor &descriptor, const WorkloadInfo &Info) const override
std::unique_ptr< IWorkload > CreateConvertBf16ToFp32(const ConvertBf16ToFp32QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateRank(const RankQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateArgMinMax(const ArgMinMaxQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePrelu(const PreluQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateTranspose(const TransposeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSpaceToDepth(const SpaceToDepthQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePermute(const PermuteQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo, const bool IsMemoryManaged=true) const override