ArmNN
 21.05
NeonWorkloadFactory.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NeonBackendId.hpp"
8 #include "NeonTensorHandle.hpp"
10 
11 #include <Layer.hpp>
12 
13 #include <armnn/Utils.hpp>
17 
22 
25 
26 namespace armnn
27 {
28 
29 namespace
30 {
31 static const BackendId s_Id{NeonBackendId()};
32 }
33 
35  Optional<DataType> dataType,
36  std::string& outReasonIfUnsupported)
37 {
38  return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
39 }
40 
42  Optional<DataType> dataType,
43  std::string& outReasonIfUnsupported,
44  const ModelOptions& modelOptions)
45 {
46  return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions);
47 }
48 
50 {
51  return s_Id;
52 }
53 
54 void NeonWorkloadFactory::SetNumberOfThreads()
55 {
56  if (m_ModelContextPtr)
57  {
58  const unsigned int MIN_THREADS = 1;
59  const unsigned int MAX_THREADS = 64;
60 
61  // Set the number of threads to be used if the user has set NumberOfThreads param
62  // Only set if within limit or valid input
63  auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
64  auto numberOfThreads = modelOptions->GetNumberOfThreads();
65 
66  if (numberOfThreads != 0 && numberOfThreads >= MIN_THREADS && numberOfThreads <= MAX_THREADS)
67  {
68  arm_compute::Scheduler::get().set_num_threads(numberOfThreads);
69  }
70  }
71 }
72 
73 NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager)
74  : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{})
75 {
76  SetNumberOfThreads();
77 }
78 
79 NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager,
81  : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr)
82 {
83  SetNumberOfThreads();
84 }
85 
86 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
87  TensorShape const& subTensorShape,
88  unsigned int const* subTensorOrigin) const
89 {
90  const arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape);
91 
93  coords.set_num_dimensions(subTensorShape.GetNumDimensions());
94  for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++)
95  {
96  // Arm compute indexes tensor coords in reverse order.
97  unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1;
98  coords.set(i, armnn::numeric_cast<int>(subTensorOrigin[revertedIndex]));
99  }
100 
101  const arm_compute::TensorShape parentShape = armcomputetensorutils::BuildArmComputeTensorShape(parent.GetShape());
102  if (!::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, parentShape, coords, shape))
103  {
104  return nullptr;
105  }
106 
107  return std::make_unique<NeonSubTensorHandle>(
108  PolymorphicDowncast<IAclTensorHandle*>(&parent), shape, coords);
109 }
110 
111 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
112  const bool IsMemoryManaged) const
113 {
114  auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo);
115  if (IsMemoryManaged)
116  {
117  tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
118  }
119  return tensorHandle;
120 }
121 
122 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
123  DataLayout dataLayout,
124  const bool IsMemoryManaged) const
125 {
126  auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo, dataLayout);
127  if (IsMemoryManaged)
128  {
129  tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
130  }
131  return tensorHandle;
132 }
133 
134 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateAbs(const AbsQueueDescriptor& descriptor,
135  const WorkloadInfo& info) const
136 {
137  IgnoreUnused(descriptor);
138 
139  ElementwiseUnaryQueueDescriptor elementwiseUnaryDescriptor;
140  elementwiseUnaryDescriptor.m_Parameters = ElementwiseUnaryDescriptor(UnaryOperation::Abs);
141 
142  return CreateElementwiseUnary(elementwiseUnaryDescriptor, info);
143 }
144 
145 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
146  const WorkloadInfo& info) const
147 {
148  return std::make_unique<NeonActivationWorkload>(descriptor, info);
149 }
150 
151 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
152  const WorkloadInfo& info) const
153 {
154  return std::make_unique<NeonAdditionWorkload>(descriptor, info);
155 }
156 
157 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateArgMinMax(const ArgMinMaxQueueDescriptor& descriptor,
158  const WorkloadInfo& info) const
159 {
160  return std::make_unique<NeonArgMinMaxWorkload>(descriptor, info);
161 }
162 
163 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateBatchNormalization(
164  const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
165 {
166  return std::make_unique<NeonBatchNormalizationWorkload>(descriptor, info);
167 }
168 
170  const WorkloadInfo& info) const
171 {
172  return std::make_unique<NeonBatchToSpaceNdWorkload>(descriptor, info);
173 }
174 
175 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateCast(const CastQueueDescriptor& descriptor,
176  const WorkloadInfo& info) const
177 {
178  return std::make_unique<NeonCastWorkload>(descriptor, info);
179 }
180 
181 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateComparison(const ComparisonQueueDescriptor& descriptor,
182  const WorkloadInfo& info) const
183 {
184  return std::make_unique<NeonComparisonWorkload>(descriptor, info);
185 }
186 
187 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConcat(const ConcatQueueDescriptor& descriptor,
188  const WorkloadInfo& info) const
189 {
190  return std::make_unique<NeonConcatWorkload>(descriptor, info);
191 }
192 
193 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
194  const WorkloadInfo& info) const
195 {
196  return std::make_unique<NeonConstantWorkload>(descriptor, info);
197 }
198 
200  const ConvertBf16ToFp32QueueDescriptor& descriptor,
201  const WorkloadInfo& info) const
202 {
203  return std::make_unique<NeonConvertBf16ToFp32Workload>(descriptor, info);
204 }
205 
207  const ConvertFp16ToFp32QueueDescriptor& descriptor,
208  const WorkloadInfo& info) const
209 {
210  return std::make_unique<NeonConvertFp16ToFp32Workload>(descriptor, info);
211 }
212 
214  const ConvertFp32ToBf16QueueDescriptor& descriptor,
215  const WorkloadInfo& info) const
216 {
217  return std::make_unique<NeonConvertFp32ToBf16Workload>(descriptor, info);
218 }
219 
221  const ConvertFp32ToFp16QueueDescriptor& descriptor,
222  const WorkloadInfo& info) const
223 {
224  return std::make_unique<NeonConvertFp32ToFp16Workload>(descriptor, info);
225 }
226 
227 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution2d(
228  const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
229 {
230  bool isFastMathEnabled = false;
231  if (m_ModelContextPtr)
232  {
233  if (m_ModelContextPtr.get() != nullptr)
234  {
235  auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
236  if (modelOptions)
237  {
238  isFastMathEnabled = modelOptions->IsFastMathEnabled();
239  }
240  }
241  }
242  return std::make_unique<NeonConvolution2dWorkload>(descriptor,
243  info,
244  m_MemoryManager->GetIntraLayerManager(),
245  isFastMathEnabled);
246 }
247 
248 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor,
249  const WorkloadInfo& info) const
250 {
251  return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
252 }
253 
254 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthToSpace(const DepthToSpaceQueueDescriptor& descriptor,
255  const WorkloadInfo& info) const
256 {
257  return std::make_unique<NeonDepthToSpaceWorkload>(descriptor, info);
258 }
259 
261  const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
262 {
263  return std::make_unique<NeonDepthwiseConvolutionWorkload>(descriptor, info);
264 }
265 
266 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDequantize(const DequantizeQueueDescriptor& descriptor,
267  const WorkloadInfo& info) const
268 {
269  return std::make_unique<NeonDequantizeWorkload>(descriptor, info);
270 }
271 
274 {
275  return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
276 }
277 
278 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateDivision(
279  const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const
280 {
281  return std::make_unique<NeonDivisionWorkload>(descriptor, info);
282 }
283 
285  const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info) const
286 {
287  switch(descriptor.m_Parameters.m_Operation)
288  {
289  case UnaryOperation::Abs:
290  {
291  AbsQueueDescriptor absQueueDescriptor;
292  absQueueDescriptor.m_Inputs = descriptor.m_Inputs;
293  absQueueDescriptor.m_Outputs = descriptor.m_Outputs;
294 
295  return std::make_unique<NeonAbsWorkload>(absQueueDescriptor, info);
296  }
298  {
299  RsqrtQueueDescriptor rsqrtQueueDescriptor;
300  rsqrtQueueDescriptor.m_Inputs = descriptor.m_Inputs;
301  rsqrtQueueDescriptor.m_Outputs = descriptor.m_Outputs;
302 
303  return std::make_unique<NeonRsqrtWorkload>(rsqrtQueueDescriptor, info);
304  }
305  case UnaryOperation::Neg:
306  return std::make_unique<NeonNegWorkload>(descriptor, info);
307  case UnaryOperation::Exp:
308  return std::make_unique<NeonExpWorkload>(descriptor, info);
310  return std::make_unique<NeonLogicalNotWorkload>(descriptor, info);
311  default:
312  return nullptr;
313  }
314 }
315 
316 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateEqual(const EqualQueueDescriptor& descriptor,
317  const WorkloadInfo& info) const
318 {
319  IgnoreUnused(descriptor);
320 
321  ComparisonQueueDescriptor comparisonDescriptor;
323 
324  return CreateComparison(comparisonDescriptor, info);
325 }
326 
327 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFill(const FillQueueDescriptor& descriptor,
328  const WorkloadInfo& info) const
329 {
330  return std::make_unique<NeonFillWorkload>(descriptor, info);
331 }
332 
333 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
334  const WorkloadInfo& info) const
335 {
336  return MakeWorkloadHelper<NeonFloorFloatWorkload, NullWorkload>(descriptor, info);
337 }
338 
339 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateFullyConnected(
340  const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const
341 {
342  return std::make_unique<NeonFullyConnectedWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
343 }
344 
345 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateGather(const armnn::GatherQueueDescriptor& descriptor,
346  const armnn::WorkloadInfo& info) const
347 {
348  return std::make_unique<NeonGatherWorkload>(descriptor, info);
349 }
350 
351 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateGreater(const GreaterQueueDescriptor& descriptor,
352  const WorkloadInfo& info) const
353 {
354  IgnoreUnused(descriptor);
355 
356  ComparisonQueueDescriptor comparisonDescriptor;
358 
359  return CreateComparison(comparisonDescriptor, info);
360 }
361 
362 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
363  const WorkloadInfo& info) const
364 {
365  return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
366 }
367 
369  const InstanceNormalizationQueueDescriptor& descriptor,
370  const WorkloadInfo& info) const
371 {
372  return std::make_unique<NeonInstanceNormalizationWorkload>(descriptor, info);
373 }
374 
376  const WorkloadInfo& info) const
377 {
378  return MakeWorkloadHelper<NeonL2NormalizationFloatWorkload, NullWorkload>(descriptor, info,
379  m_MemoryManager->GetIntraLayerManager());
380 }
381 
382 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLogSoftmax(const LogSoftmaxQueueDescriptor& descriptor,
383  const WorkloadInfo& info) const
384 {
385  return std::make_unique<NeonLogSoftmaxWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
386 }
387 
388 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLogicalBinary(const LogicalBinaryQueueDescriptor& descriptor,
389  const WorkloadInfo& info) const
390 {
391  switch(descriptor.m_Parameters.m_Operation)
392  {
394  return std::make_unique<NeonLogicalAndWorkload>(descriptor, info);
396  return std::make_unique<NeonLogicalOrWorkload>(descriptor, info);
397  default:
398  return nullptr;
399  }
400 }
401 
402 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor,
403  const WorkloadInfo& info) const
404 {
405  return MakeWorkloadHelper<NeonLstmFloatWorkload, NullWorkload>(descriptor, info);
406 }
407 
408 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMaximum(const MaximumQueueDescriptor& descriptor,
409  const WorkloadInfo& info) const
410 {
411  return std::make_unique<NeonMaximumWorkload>(descriptor, info);
412 }
413 
414 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor,
415  const WorkloadInfo& info) const
416 {
417  return std::make_unique<NeonMeanWorkload>(descriptor, info);
418 }
419 
420 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
421  const WorkloadInfo& info) const
422 {
423  if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
424  {
425  throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemCopy workload");
426  }
427 
428  return MakeWorkloadHelper<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info);
429 }
430 
431 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMemImport(const MemImportQueueDescriptor& descriptor,
432  const WorkloadInfo& info) const
433 {
434  if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
435  {
436  throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemImport workload");
437  }
438 
439  return std::make_unique<ImportMemGenericWorkload>(descriptor, info);
440 }
441 
442 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor,
443  const WorkloadInfo& info) const
444 {
445  return CreateConcat(descriptor, info);
446 }
447 
448 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMinimum(const MinimumQueueDescriptor& descriptor,
449  const WorkloadInfo& info) const
450 {
451  return std::make_unique<NeonMinimumWorkload>(descriptor, info);
452 }
453 
454 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMultiplication(
455  const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const
456 {
457  return std::make_unique<NeonMultiplicationWorkload>(descriptor, info);
458 }
459 
460 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateNormalization(
461  const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
462 {
463  return MakeWorkloadHelper<NeonNormalizationFloatWorkload, NullWorkload>(descriptor, info,
464  m_MemoryManager->GetIntraLayerManager());
465 }
466 
467 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
468  const WorkloadInfo& info) const
469 {
470  return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
471 }
472 
473 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
474  const WorkloadInfo& info) const
475 {
476  return std::make_unique<NeonPadWorkload>(descriptor, info);
477 }
478 
479 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
480  const WorkloadInfo& info) const
481 {
482  return std::make_unique<NeonPermuteWorkload>(descriptor, info);
483 }
484 
485 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
486  const WorkloadInfo& info) const
487 {
488  return std::make_unique<NeonPooling2dWorkload>(descriptor, info);
489 }
490 
491 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor,
492  const WorkloadInfo& info) const
493 {
494  return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
495 }
496 
497 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePrelu(const armnn::PreluQueueDescriptor &descriptor,
498  const armnn::WorkloadInfo &info) const
499 {
500  return std::make_unique<NeonPreluWorkload>(descriptor, info);
501 }
502 
503 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateQLstm(const QLstmQueueDescriptor& descriptor,
504  const WorkloadInfo& info) const
505 {
506  return std::make_unique<NeonQLstmWorkload>(descriptor, info);
507 }
508 
509 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateQuantize(const QuantizeQueueDescriptor& descriptor,
510  const WorkloadInfo& info) const
511 {
512  return std::make_unique<NeonQuantizeWorkload>(descriptor, info);
513 }
514 
515 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateQuantizedLstm(const QuantizedLstmQueueDescriptor& descriptor,
516  const WorkloadInfo& info) const
517 {
518  return std::make_unique<NeonQuantizedLstmWorkload>(descriptor, info);
519 }
520 
521 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateRank(const RankQueueDescriptor& descriptor,
522  const WorkloadInfo& info) const
523 {
524  return std::make_unique<NeonRankWorkload>(descriptor, info);
525 }
526 
527 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReduce(const ReduceQueueDescriptor& descriptor,
528  const WorkloadInfo& info) const
529 {
530  return std::make_unique<NeonReduceWorkload>(descriptor, info);
531 }
532 
533 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
534  const WorkloadInfo& info) const
535 {
536  return std::make_unique<NeonReshapeWorkload>(descriptor, info);
537 }
538 
539 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor,
540  const WorkloadInfo& info) const
541 {
542  return std::make_unique<NeonResizeWorkload>(descriptor, info);
543 }
544 
545 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateResizeBilinear(
546  const ResizeBilinearQueueDescriptor& descriptor,
547  const WorkloadInfo& info) const
548 {
549  ResizeQueueDescriptor resizeDescriptor;
550  resizeDescriptor.m_Inputs = descriptor.m_Inputs;
551  resizeDescriptor.m_Outputs = descriptor.m_Outputs;
552 
553  resizeDescriptor.m_Parameters.m_DataLayout = descriptor.m_Parameters.m_DataLayout;
554  resizeDescriptor.m_Parameters.m_TargetWidth = descriptor.m_Parameters.m_TargetWidth;
555  resizeDescriptor.m_Parameters.m_TargetHeight = descriptor.m_Parameters.m_TargetHeight;
556 
557  return CreateResize(resizeDescriptor, info);
558 }
559 
560 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateRsqrt(const RsqrtQueueDescriptor &descriptor,
561  const WorkloadInfo &info) const
562 {
563  IgnoreUnused(descriptor);
564 
565  ElementwiseUnaryQueueDescriptor elementwiseUnaryDescriptor;
566  elementwiseUnaryDescriptor.m_Parameters = ElementwiseUnaryDescriptor(UnaryOperation::Rsqrt);
567 
568  return CreateElementwiseUnary(elementwiseUnaryDescriptor, info);
569 }
570 
571 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSlice(const SliceQueueDescriptor& descriptor,
572  const WorkloadInfo& info) const
573 {
574  return std::make_unique<NeonSliceWorkload>(descriptor, info);
575 }
576 
577 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
578  const WorkloadInfo& info) const
579 {
580  return std::make_unique<NeonSoftmaxWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
581 }
582 
584  const WorkloadInfo& info) const
585 {
586  return std::make_unique<NeonSpaceToBatchNdWorkload>(descriptor, info);
587 }
588 
589 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSpaceToDepth(const SpaceToDepthQueueDescriptor& descriptor,
590  const WorkloadInfo& info) const
591 {
592  return std::make_unique<NeonSpaceToDepthWorkload>(descriptor, info);
593 }
594 
595 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
596  const WorkloadInfo& info) const
597 {
598  return std::make_unique<NeonSplitterWorkload>(descriptor, info);
599 }
600 
601 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateStack(const StackQueueDescriptor& descriptor,
602  const WorkloadInfo& info) const
603 {
604  return std::make_unique<NeonStackWorkload>(descriptor, info);
605 }
606 
607 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
608  const WorkloadInfo& info) const
609 {
610  return std::make_unique<NeonStridedSliceWorkload>(descriptor, info);
611 }
612 
613 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateSubtraction(
614  const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info) const
615 {
616  return std::make_unique<NeonSubtractionWorkload>(descriptor, info);
617 }
618 
619 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateTranspose(const TransposeQueueDescriptor& descriptor,
620  const WorkloadInfo& info) const
621 {
622  return std::make_unique<NeonTransposeWorkload>(descriptor, info);
623 }
624 
626  const TransposeConvolution2dQueueDescriptor &descriptor,
627  const WorkloadInfo &info) const
628 {
629  return std::make_unique<NeonTransposeConvolution2dWorkload>(descriptor, info,
630  m_MemoryManager->GetIntraLayerManager());
631 }
632 
633 } // namespace armnn
std::unique_ptr< IWorkload > CreateDepthToSpace(const DepthToSpaceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDebug(const DebugQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateLstm(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateCast(const CastQueueDescriptor &descriptor, const WorkloadInfo &info) const override
UnaryOperation m_Operation
Specifies the elementwiseUnary operation to execute.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:62
std::unique_ptr< IWorkload > CreateLogSoftmax(const LogSoftmaxQueueDescriptor &descriptor, const WorkloadInfo &info) const override
DataLayout
Definition: Types.hpp:54
std::unique_ptr< IWorkload > CreateGather(const GatherQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateNormalization(const NormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateFullyConnected(const FullyConnectedQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateComparison(const ComparisonQueueDescriptor &descriptor, const WorkloadInfo &Info) const override
std::unique_ptr< IWorkload > CreateRsqrt(const RsqrtQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePad(const PadQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
A ComparisonDescriptor for the ComparisonLayer.
Definition: Descriptors.hpp:78
uint32_t m_TargetWidth
Target width value.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
std::vector< BackendOptions > ModelOptions
std::unique_ptr< IWorkload > CreateGreater(const GreaterQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMemImport(const MemImportQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
constexpr const char * NeonBackendId()
std::unique_ptr< IWorkload > CreateL2Normalization(const L2NormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateStridedSlice(const StridedSliceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMinimum(const MinimumQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMultiplication(const MultiplicationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateQuantize(const QuantizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
std::unique_ptr< ITensorHandle > CreateSubTensorHandle(ITensorHandle &parent, TensorShape const &subTensorShape, unsigned int const *subTensorOrigin) const override
std::unique_ptr< IWorkload > CreateConvolution2d(const Convolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
LogicalBinaryOperation m_Operation
Specifies the logical operation to execute.
The NeonBackendModelContext is used to pass in Neon specific backend ModelOptions.
NeonWorkloadFactory(const std::shared_ptr< NeonMemoryManager > &memoryManager)
std::unique_ptr< IWorkload > CreateReshape(const ReshapeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
static bool IsLayerSupported(const Layer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
std::unique_ptr< IWorkload > CreateAbs(const AbsQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSoftmax(const SoftmaxQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateResizeBilinear(const ResizeBilinearQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateEqual(const EqualQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSlice(const SliceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
std::unique_ptr< IWorkload > CreateQuantizedLstm(const QuantizedLstmQueueDescriptor &descriptor, const WorkloadInfo &info) const override
uint32_t m_TargetWidth
Target width value.
const BackendId & GetBackendId() const override
std::unique_ptr< IWorkload > CreateSplitter(const SplitterQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateFill(const FillQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDequantize(const DequantizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMemCopy(const MemCopyQueueDescriptor &descriptor, const WorkloadInfo &info) const override
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
std::unique_ptr< IWorkload > CreateInstanceNormalization(const InstanceNormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
uint32_t m_TargetHeight
Target height value.
uint32_t m_TargetHeight
Target height value.
std::unique_ptr< IWorkload > CreatePreCompiled(const PreCompiledQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateLogicalBinary(const LogicalBinaryQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateOutput(const OutputQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConvertFp32ToBf16(const ConvertFp32ToBf16QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConstant(const ConstantQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateStack(const StackQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateFloor(const FloorQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateActivation(const ActivationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
A ElementwiseUnaryDescriptor for the ElementwiseUnaryLayer.
Definition: Descriptors.hpp:98
std::vector< ITensorHandle * > m_Outputs
std::unique_ptr< IWorkload > CreateMerger(const MergerQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDivision(const DivisionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition: Tensor.cpp:174
std::unique_ptr< IWorkload > CreateReduce(const ReduceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateInput(const InputQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateQLstm(const QLstmQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateBatchNormalization(const BatchNormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor &descriptor, const WorkloadInfo &Info) const override
Contains information about inputs and outputs to a layer.
std::unique_ptr< IWorkload > CreateResize(const ResizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::vector< ITensorHandle * > m_Inputs
std::unique_ptr< IWorkload > CreateConcat(const ConcatQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePooling2d(const Pooling2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor &descriptor, const WorkloadInfo &Info) const override
std::unique_ptr< IWorkload > CreateAddition(const AdditionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDetectionPostProcess(const DetectionPostProcessQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSubtraction(const SubtractionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMaximum(const MaximumQueueDescriptor &descriptor, const WorkloadInfo &info) const override
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
std::unique_ptr< IWorkload > CreateMean(const MeanQueueDescriptor &descriptor, const WorkloadInfo &Info) const override
std::unique_ptr< IWorkload > CreateConvertBf16ToFp32(const ConvertBf16ToFp32QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateRank(const RankQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateArgMinMax(const ArgMinMaxQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePrelu(const PreluQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateTranspose(const TransposeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSpaceToDepth(const SpaceToDepthQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePermute(const PermuteQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo, const bool IsMemoryManaged=true) const override