ArmNN
 21.08
NeonWorkloadFactory.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NeonBackendId.hpp"
8 #include "NeonTensorHandle.hpp"
10 
11 #include <Layer.hpp>
12 
13 #include <armnn/Utils.hpp>
17 
22 
25 
26 namespace armnn
27 {
28 
29 namespace
30 {
31 static const BackendId s_Id{NeonBackendId()};
32 }
33 
35  Optional<DataType> dataType,
36  std::string& outReasonIfUnsupported)
37 {
38  return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
39 }
40 
42  Optional<DataType> dataType,
43  std::string& outReasonIfUnsupported,
44  const ModelOptions& modelOptions)
45 {
46  return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions);
47 }
48 
50 {
51  return s_Id;
52 }
53 
54 void NeonWorkloadFactory::SetNumberOfThreads()
55 {
56  if (m_ModelContextPtr)
57  {
58  const unsigned int MIN_THREADS = 1;
59  const unsigned int MAX_THREADS = 64;
60 
61  // Set the number of threads to be used if the user has set NumberOfThreads param
62  // Only set if within limit or valid input
63  auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
64  auto numberOfThreads = modelOptions->GetNumberOfThreads();
65 
66  if (numberOfThreads != 0 && numberOfThreads >= MIN_THREADS && numberOfThreads <= MAX_THREADS)
67  {
68  arm_compute::Scheduler::get().set_num_threads(numberOfThreads);
69  }
70  }
71 }
72 
73 NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager)
74  : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{})
75 {
76  SetNumberOfThreads();
77 }
78 
79 NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager,
81  : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr)
82 {
83  SetNumberOfThreads();
84 }
85 
86 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
87  TensorShape const& subTensorShape,
88  unsigned int const* subTensorOrigin) const
89 {
90  const arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape);
91 
93  coords.set_num_dimensions(subTensorShape.GetNumDimensions());
94  for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++)
95  {
96  // Arm compute indexes tensor coords in reverse order.
97  unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1;
98  coords.set(i, armnn::numeric_cast<int>(subTensorOrigin[revertedIndex]));
99  }
100 
101  const arm_compute::TensorShape parentShape = armcomputetensorutils::BuildArmComputeTensorShape(parent.GetShape());
102  if (!::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, parentShape, coords, shape))
103  {
104  return nullptr;
105  }
106 
107  return std::make_unique<NeonSubTensorHandle>(
108  PolymorphicDowncast<IAclTensorHandle*>(&parent), shape, coords);
109 }
110 
111 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
112  const bool IsMemoryManaged) const
113 {
114  auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo);
115  if (IsMemoryManaged)
116  {
117  tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
118  }
119  return tensorHandle;
120 }
121 
122 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
123  DataLayout dataLayout,
124  const bool IsMemoryManaged) const
125 {
126  auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo, dataLayout);
127  if (IsMemoryManaged)
128  {
129  tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
130  }
131  return tensorHandle;
132 }
133 
134 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateAbs(const AbsQueueDescriptor& descriptor,
135  const WorkloadInfo& info) const
136 {
137  IgnoreUnused(descriptor);
138 
139  ElementwiseUnaryQueueDescriptor elementwiseUnaryDescriptor;
140  elementwiseUnaryDescriptor.m_Parameters = ElementwiseUnaryDescriptor(UnaryOperation::Abs);
141 
142  return CreateElementwiseUnary(elementwiseUnaryDescriptor, info);
143 }
144 
145 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
146  const WorkloadInfo& info) const
147 {
148  return std::make_unique<NeonActivationWorkload>(descriptor, info);
149 }
150 
151 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
152  const WorkloadInfo& info) const
153 {
154  return std::make_unique<NeonAdditionWorkload>(descriptor, info);
155 }
156 
157 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateArgMinMax(const ArgMinMaxQueueDescriptor& descriptor,
158  const WorkloadInfo& info) const
159 {
160  return std::make_unique<NeonArgMinMaxWorkload>(descriptor, info);
161 }
162 
163 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateBatchNormalization(
164  const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
165 {
166  return std::make_unique<NeonBatchNormalizationWorkload>(descriptor, info);
167 }
168 
170  const WorkloadInfo& info) const
171 {
172  return std::make_unique<NeonBatchToSpaceNdWorkload>(descriptor, info);
173 }
174 
175 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateCast(const CastQueueDescriptor& descriptor,
176  const WorkloadInfo& info) const
177 {
178  return std::make_unique<NeonCastWorkload>(descriptor, info);
179 }
180 
181 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateComparison(const ComparisonQueueDescriptor& descriptor,
182  const WorkloadInfo& info) const
183 {
184  return std::make_unique<NeonComparisonWorkload>(descriptor, info);
185 }
186 
187 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConcat(const ConcatQueueDescriptor& descriptor,
188  const WorkloadInfo& info) const
189 {
190  return std::make_unique<NeonConcatWorkload>(descriptor, info);
191 }
192 
193 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
194  const WorkloadInfo& info) const
195 {
196  return std::make_unique<NeonConstantWorkload>(descriptor, info);
197 }
198 
200  const ConvertBf16ToFp32QueueDescriptor& descriptor,
201  const WorkloadInfo& info) const
202 {
203  return std::make_unique<NeonConvertBf16ToFp32Workload>(descriptor, info);
204 }
205 
207  const ConvertFp16ToFp32QueueDescriptor& descriptor,
208  const WorkloadInfo& info) const
209 {
210  return std::make_unique<NeonConvertFp16ToFp32Workload>(descriptor, info);
211 }
212 
214  const ConvertFp32ToBf16QueueDescriptor& descriptor,
215  const WorkloadInfo& info) const
216 {
217  return std::make_unique<NeonConvertFp32ToBf16Workload>(descriptor, info);
218 }
219 
221  const ConvertFp32ToFp16QueueDescriptor& descriptor,
222  const WorkloadInfo& info) const
223 {
224  return std::make_unique<NeonConvertFp32ToFp16Workload>(descriptor, info);
225 }
226 
227 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution2d(
228  const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
229 {
230  bool isFastMathEnabled = false;
231  if (m_ModelContextPtr)
232  {
233  if (m_ModelContextPtr.get() != nullptr)
234  {
235  auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
236  if (modelOptions)
237  {
238  isFastMathEnabled = modelOptions->IsFastMathEnabled();
239  }
240  }
241  }
242  return std::make_unique<NeonConvolution2dWorkload>(descriptor,
243  info,
244  m_MemoryManager->GetIntraLayerManager(),
245  isFastMathEnabled);
246 }
247 
248 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor,
249  const WorkloadInfo& info) const
250 {
251  return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
252 }
253 
254 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthToSpace(const DepthToSpaceQueueDescriptor& descriptor,
255  const WorkloadInfo& info) const
256 {
257  return std::make_unique<NeonDepthToSpaceWorkload>(descriptor, info);
258 }
259 
261  const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
262 {
263  return std::make_unique<NeonDepthwiseConvolutionWorkload>(descriptor, info);
264 }
265 
266 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDequantize(const DequantizeQueueDescriptor& descriptor,
267  const WorkloadInfo& info) const
268 {
269  return std::make_unique<NeonDequantizeWorkload>(descriptor, info);
270 }
271 
274 {
275  return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
276 }
277 
278 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateDivision(
279  const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const
280 {
281  return std::make_unique<NeonDivisionWorkload>(descriptor, info);
282 }
283 
285  const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info) const
286 {
287  switch(descriptor.m_Parameters.m_Operation)
288  {
289  case UnaryOperation::Abs:
290  {
291  AbsQueueDescriptor absQueueDescriptor;
292  absQueueDescriptor.m_Inputs = descriptor.m_Inputs;
293  absQueueDescriptor.m_Outputs = descriptor.m_Outputs;
294 
295  return std::make_unique<NeonAbsWorkload>(absQueueDescriptor, info);
296  }
297  case UnaryOperation::Exp:
298  return std::make_unique<NeonExpWorkload>(descriptor, info);
300  return std::make_unique<NeonLogicalNotWorkload>(descriptor, info);
301  case UnaryOperation::Log:
302  return std::make_unique<NeonLogWorkload>(descriptor, info);
303  case UnaryOperation::Neg:
304  return std::make_unique<NeonNegWorkload>(descriptor, info);
306  {
307  RsqrtQueueDescriptor rsqrtQueueDescriptor;
308  rsqrtQueueDescriptor.m_Inputs = descriptor.m_Inputs;
309  rsqrtQueueDescriptor.m_Outputs = descriptor.m_Outputs;
310 
311  return std::make_unique<NeonRsqrtWorkload>(rsqrtQueueDescriptor, info);
312  }
313  case UnaryOperation::Sin:
314  return std::make_unique<NeonSinWorkload>(descriptor, info);
315  default:
316  return nullptr;
317  }
318 }
319 
320 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateEqual(const EqualQueueDescriptor& descriptor,
321  const WorkloadInfo& info) const
322 {
323  IgnoreUnused(descriptor);
324 
325  ComparisonQueueDescriptor comparisonDescriptor;
327 
328  return CreateComparison(comparisonDescriptor, info);
329 }
330 
331 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFill(const FillQueueDescriptor& descriptor,
332  const WorkloadInfo& info) const
333 {
334  return std::make_unique<NeonFillWorkload>(descriptor, info);
335 }
336 
337 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
338  const WorkloadInfo& info) const
339 {
340  return MakeWorkloadHelper<NeonFloorFloatWorkload, NullWorkload>(descriptor, info);
341 }
342 
343 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateFullyConnected(
344  const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const
345 {
346  return std::make_unique<NeonFullyConnectedWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
347 }
348 
349 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateGather(const armnn::GatherQueueDescriptor& descriptor,
350  const armnn::WorkloadInfo& info) const
351 {
352  return std::make_unique<NeonGatherWorkload>(descriptor, info);
353 }
354 
355 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateGreater(const GreaterQueueDescriptor& descriptor,
356  const WorkloadInfo& info) const
357 {
358  IgnoreUnused(descriptor);
359 
360  ComparisonQueueDescriptor comparisonDescriptor;
362 
363  return CreateComparison(comparisonDescriptor, info);
364 }
365 
366 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
367  const WorkloadInfo& info) const
368 {
369  return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
370 }
371 
373  const InstanceNormalizationQueueDescriptor& descriptor,
374  const WorkloadInfo& info) const
375 {
376  return std::make_unique<NeonInstanceNormalizationWorkload>(descriptor, info);
377 }
378 
380  const WorkloadInfo& info) const
381 {
382  return MakeWorkloadHelper<NeonL2NormalizationFloatWorkload, NullWorkload>(descriptor, info,
383  m_MemoryManager->GetIntraLayerManager());
384 }
385 
386 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLogSoftmax(const LogSoftmaxQueueDescriptor& descriptor,
387  const WorkloadInfo& info) const
388 {
389  return std::make_unique<NeonLogSoftmaxWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
390 }
391 
392 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLogicalBinary(const LogicalBinaryQueueDescriptor& descriptor,
393  const WorkloadInfo& info) const
394 {
395  switch(descriptor.m_Parameters.m_Operation)
396  {
398  return std::make_unique<NeonLogicalAndWorkload>(descriptor, info);
400  return std::make_unique<NeonLogicalOrWorkload>(descriptor, info);
401  default:
402  return nullptr;
403  }
404 }
405 
406 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor,
407  const WorkloadInfo& info) const
408 {
409  return MakeWorkloadHelper<NeonLstmFloatWorkload, NullWorkload>(descriptor, info);
410 }
411 
412 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMaximum(const MaximumQueueDescriptor& descriptor,
413  const WorkloadInfo& info) const
414 {
415  return std::make_unique<NeonMaximumWorkload>(descriptor, info);
416 }
417 
418 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor,
419  const WorkloadInfo& info) const
420 {
421  return std::make_unique<NeonMeanWorkload>(descriptor, info);
422 }
423 
424 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
425  const WorkloadInfo& info) const
426 {
427  if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
428  {
429  throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemCopy workload");
430  }
431 
432  return MakeWorkloadHelper<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info);
433 }
434 
435 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMemImport(const MemImportQueueDescriptor& descriptor,
436  const WorkloadInfo& info) const
437 {
438  if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
439  {
440  throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemImport workload");
441  }
442 
443  return std::make_unique<ImportMemGenericWorkload>(descriptor, info);
444 }
445 
446 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor,
447  const WorkloadInfo& info) const
448 {
449  return CreateConcat(descriptor, info);
450 }
451 
452 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMinimum(const MinimumQueueDescriptor& descriptor,
453  const WorkloadInfo& info) const
454 {
455  return std::make_unique<NeonMinimumWorkload>(descriptor, info);
456 }
457 
458 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMultiplication(
459  const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const
460 {
461  return std::make_unique<NeonMultiplicationWorkload>(descriptor, info);
462 }
463 
464 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateNormalization(
465  const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
466 {
467  return MakeWorkloadHelper<NeonNormalizationFloatWorkload, NullWorkload>(descriptor, info,
468  m_MemoryManager->GetIntraLayerManager());
469 }
470 
471 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
472  const WorkloadInfo& info) const
473 {
474  return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
475 }
476 
477 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
478  const WorkloadInfo& info) const
479 {
480  return std::make_unique<NeonPadWorkload>(descriptor, info);
481 }
482 
483 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
484  const WorkloadInfo& info) const
485 {
486  return std::make_unique<NeonPermuteWorkload>(descriptor, info);
487 }
488 
489 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
490  const WorkloadInfo& info) const
491 {
492  return std::make_unique<NeonPooling2dWorkload>(descriptor, info);
493 }
494 
495 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor,
496  const WorkloadInfo& info) const
497 {
498  return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
499 }
500 
501 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePrelu(const armnn::PreluQueueDescriptor &descriptor,
502  const armnn::WorkloadInfo &info) const
503 {
504  return std::make_unique<NeonPreluWorkload>(descriptor, info);
505 }
506 
507 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateQLstm(const QLstmQueueDescriptor& descriptor,
508  const WorkloadInfo& info) const
509 {
510  return std::make_unique<NeonQLstmWorkload>(descriptor, info);
511 }
512 
513 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateQuantize(const QuantizeQueueDescriptor& descriptor,
514  const WorkloadInfo& info) const
515 {
516  return std::make_unique<NeonQuantizeWorkload>(descriptor, info);
517 }
518 
519 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateQuantizedLstm(const QuantizedLstmQueueDescriptor& descriptor,
520  const WorkloadInfo& info) const
521 {
522  return std::make_unique<NeonQuantizedLstmWorkload>(descriptor, info);
523 }
524 
525 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateRank(const RankQueueDescriptor& descriptor,
526  const WorkloadInfo& info) const
527 {
528  return std::make_unique<NeonRankWorkload>(descriptor, info);
529 }
530 
531 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReduce(const ReduceQueueDescriptor& descriptor,
532  const WorkloadInfo& info) const
533 {
534  return std::make_unique<NeonReduceWorkload>(descriptor, info);
535 }
536 
537 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
538  const WorkloadInfo& info) const
539 {
540  return std::make_unique<NeonReshapeWorkload>(descriptor, info);
541 }
542 
543 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor,
544  const WorkloadInfo& info) const
545 {
546  return std::make_unique<NeonResizeWorkload>(descriptor, info);
547 }
548 
549 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateResizeBilinear(
550  const ResizeBilinearQueueDescriptor& descriptor,
551  const WorkloadInfo& info) const
552 {
553  ResizeQueueDescriptor resizeDescriptor;
554  resizeDescriptor.m_Inputs = descriptor.m_Inputs;
555  resizeDescriptor.m_Outputs = descriptor.m_Outputs;
556 
557  resizeDescriptor.m_Parameters.m_DataLayout = descriptor.m_Parameters.m_DataLayout;
558  resizeDescriptor.m_Parameters.m_TargetWidth = descriptor.m_Parameters.m_TargetWidth;
559  resizeDescriptor.m_Parameters.m_TargetHeight = descriptor.m_Parameters.m_TargetHeight;
560 
561  return CreateResize(resizeDescriptor, info);
562 }
563 
564 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateRsqrt(const RsqrtQueueDescriptor &descriptor,
565  const WorkloadInfo &info) const
566 {
567  IgnoreUnused(descriptor);
568 
569  ElementwiseUnaryQueueDescriptor elementwiseUnaryDescriptor;
570  elementwiseUnaryDescriptor.m_Parameters = ElementwiseUnaryDescriptor(UnaryOperation::Rsqrt);
571 
572  return CreateElementwiseUnary(elementwiseUnaryDescriptor, info);
573 }
574 
575 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSlice(const SliceQueueDescriptor& descriptor,
576  const WorkloadInfo& info) const
577 {
578  return std::make_unique<NeonSliceWorkload>(descriptor, info);
579 }
580 
581 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
582  const WorkloadInfo& info) const
583 {
584  return std::make_unique<NeonSoftmaxWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
585 }
586 
588  const WorkloadInfo& info) const
589 {
590  return std::make_unique<NeonSpaceToBatchNdWorkload>(descriptor, info);
591 }
592 
593 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSpaceToDepth(const SpaceToDepthQueueDescriptor& descriptor,
594  const WorkloadInfo& info) const
595 {
596  return std::make_unique<NeonSpaceToDepthWorkload>(descriptor, info);
597 }
598 
599 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
600  const WorkloadInfo& info) const
601 {
602  return std::make_unique<NeonSplitterWorkload>(descriptor, info);
603 }
604 
605 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateStack(const StackQueueDescriptor& descriptor,
606  const WorkloadInfo& info) const
607 {
608  return std::make_unique<NeonStackWorkload>(descriptor, info);
609 }
610 
611 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
612  const WorkloadInfo& info) const
613 {
614  return std::make_unique<NeonStridedSliceWorkload>(descriptor, info);
615 }
616 
617 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateSubtraction(
618  const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info) const
619 {
620  return std::make_unique<NeonSubtractionWorkload>(descriptor, info);
621 }
622 
623 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateTranspose(const TransposeQueueDescriptor& descriptor,
624  const WorkloadInfo& info) const
625 {
626  return std::make_unique<NeonTransposeWorkload>(descriptor, info);
627 }
628 
630  const TransposeConvolution2dQueueDescriptor &descriptor,
631  const WorkloadInfo &info) const
632 {
633  return std::make_unique<NeonTransposeConvolution2dWorkload>(descriptor, info,
634  m_MemoryManager->GetIntraLayerManager());
635 }
636 
637 } // namespace armnn
std::unique_ptr< IWorkload > CreateDepthToSpace(const DepthToSpaceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDebug(const DebugQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateLstm(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateCast(const CastQueueDescriptor &descriptor, const WorkloadInfo &info) const override
UnaryOperation m_Operation
Specifies the elementwiseUnary operation to execute.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:61
std::unique_ptr< IWorkload > CreateLogSoftmax(const LogSoftmaxQueueDescriptor &descriptor, const WorkloadInfo &info) const override
DataLayout
Definition: Types.hpp:53
std::unique_ptr< IWorkload > CreateGather(const GatherQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateNormalization(const NormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateFullyConnected(const FullyConnectedQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateComparison(const ComparisonQueueDescriptor &descriptor, const WorkloadInfo &Info) const override
std::unique_ptr< IWorkload > CreateRsqrt(const RsqrtQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePad(const PadQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
A ComparisonDescriptor for the ComparisonLayer.
Definition: Descriptors.hpp:78
uint32_t m_TargetWidth
Target width value.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
std::vector< BackendOptions > ModelOptions
std::unique_ptr< IWorkload > CreateGreater(const GreaterQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMemImport(const MemImportQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
constexpr const char * NeonBackendId()
std::unique_ptr< IWorkload > CreateL2Normalization(const L2NormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateStridedSlice(const StridedSliceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMinimum(const MinimumQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMultiplication(const MultiplicationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateQuantize(const QuantizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
std::unique_ptr< ITensorHandle > CreateSubTensorHandle(ITensorHandle &parent, TensorShape const &subTensorShape, unsigned int const *subTensorOrigin) const override
std::unique_ptr< IWorkload > CreateConvolution2d(const Convolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
LogicalBinaryOperation m_Operation
Specifies the logical operation to execute.
The NeonBackendModelContext is used to pass in Neon specific backend ModelOptions.
NeonWorkloadFactory(const std::shared_ptr< NeonMemoryManager > &memoryManager)
std::unique_ptr< IWorkload > CreateReshape(const ReshapeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
static bool IsLayerSupported(const Layer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
std::unique_ptr< IWorkload > CreateAbs(const AbsQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSoftmax(const SoftmaxQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateResizeBilinear(const ResizeBilinearQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateEqual(const EqualQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSlice(const SliceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
std::unique_ptr< IWorkload > CreateQuantizedLstm(const QuantizedLstmQueueDescriptor &descriptor, const WorkloadInfo &info) const override
uint32_t m_TargetWidth
Target width value.
const BackendId & GetBackendId() const override
std::unique_ptr< IWorkload > CreateSplitter(const SplitterQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateFill(const FillQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDequantize(const DequantizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMemCopy(const MemCopyQueueDescriptor &descriptor, const WorkloadInfo &info) const override
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
std::unique_ptr< IWorkload > CreateInstanceNormalization(const InstanceNormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
uint32_t m_TargetHeight
Target height value.
uint32_t m_TargetHeight
Target height value.
std::unique_ptr< IWorkload > CreatePreCompiled(const PreCompiledQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateLogicalBinary(const LogicalBinaryQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateOutput(const OutputQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConvertFp32ToBf16(const ConvertFp32ToBf16QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConstant(const ConstantQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateStack(const StackQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateFloor(const FloorQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateActivation(const ActivationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
A ElementwiseUnaryDescriptor for the ElementwiseUnaryLayer.
Definition: Descriptors.hpp:98
std::vector< ITensorHandle * > m_Outputs
std::unique_ptr< IWorkload > CreateMerger(const MergerQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDivision(const DivisionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition: Tensor.cpp:174
std::unique_ptr< IWorkload > CreateReduce(const ReduceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateInput(const InputQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateQLstm(const QLstmQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateBatchNormalization(const BatchNormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor &descriptor, const WorkloadInfo &Info) const override
Contains information about TensorInfos of a layer.
std::unique_ptr< IWorkload > CreateResize(const ResizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::vector< ITensorHandle * > m_Inputs
std::unique_ptr< IWorkload > CreateConcat(const ConcatQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePooling2d(const Pooling2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor &descriptor, const WorkloadInfo &Info) const override
std::unique_ptr< IWorkload > CreateAddition(const AdditionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDetectionPostProcess(const DetectionPostProcessQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSubtraction(const SubtractionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMaximum(const MaximumQueueDescriptor &descriptor, const WorkloadInfo &info) const override
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
std::unique_ptr< IWorkload > CreateMean(const MeanQueueDescriptor &descriptor, const WorkloadInfo &Info) const override
std::unique_ptr< IWorkload > CreateConvertBf16ToFp32(const ConvertBf16ToFp32QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateRank(const RankQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateArgMinMax(const ArgMinMaxQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePrelu(const PreluQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateTranspose(const TransposeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor &descriptor, const WorkloadInfo &info) const override
Depthwise Convolution 2D layer workload data.
std::unique_ptr< IWorkload > CreateSpaceToDepth(const SpaceToDepthQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePermute(const PermuteQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo, const bool IsMemoryManaged=true) const override