ArmNN
 21.11
NeonWorkloadFactory.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NeonBackendId.hpp"
8 #include "NeonTensorHandle.hpp"
10 
11 #include <Layer.hpp>
12 
13 #include <armnn/Utils.hpp>
17 
22 
25 
26 namespace armnn
27 {
28 
29 namespace
30 {
31 static const BackendId s_Id{NeonBackendId()};
32 }
33 
35  Optional<DataType> dataType,
36  std::string& outReasonIfUnsupported)
37 {
38  return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
39 }
40 
42  Optional<DataType> dataType,
43  std::string& outReasonIfUnsupported,
44  const ModelOptions& modelOptions)
45 {
46  return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions);
47 }
48 
50 {
51  return s_Id;
52 }
53 
54 void NeonWorkloadFactory::SetNumberOfThreads()
55 {
56  if (m_ModelContextPtr)
57  {
58  const unsigned int MIN_THREADS = 1;
59  const unsigned int MAX_THREADS = 64;
60 
61  // Set the number of threads to be used if the user has set NumberOfThreads param
62  // Only set if within limit or valid input
63  auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
64  auto numberOfThreads = modelOptions->GetNumberOfThreads();
65 
66  if (numberOfThreads != 0 && numberOfThreads >= MIN_THREADS && numberOfThreads <= MAX_THREADS)
67  {
68  arm_compute::Scheduler::get().set_num_threads(numberOfThreads);
69  }
70  }
71 }
72 
73 NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager)
74  : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{})
75 {
76  SetNumberOfThreads();
77 }
78 
79 NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager,
81  : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr)
82 {
83  SetNumberOfThreads();
84 }
85 
86 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
87  TensorShape const& subTensorShape,
88  unsigned int const* subTensorOrigin) const
89 {
90  const arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape);
91 
93  coords.set_num_dimensions(subTensorShape.GetNumDimensions());
94  for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++)
95  {
96  // Arm compute indexes tensor coords in reverse order.
97  unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1;
98  coords.set(i, armnn::numeric_cast<int>(subTensorOrigin[revertedIndex]));
99  }
100 
101  const arm_compute::TensorShape parentShape = armcomputetensorutils::BuildArmComputeTensorShape(parent.GetShape());
102  if (!::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, parentShape, coords, shape))
103  {
104  return nullptr;
105  }
106 
107  return std::make_unique<NeonSubTensorHandle>(
108  PolymorphicDowncast<IAclTensorHandle*>(&parent), shape, coords);
109 }
110 
111 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
112  const bool IsMemoryManaged) const
113 {
114  auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo);
115  if (IsMemoryManaged)
116  {
117  tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
118  }
119  return tensorHandle;
120 }
121 
122 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
123  DataLayout dataLayout,
124  const bool IsMemoryManaged) const
125 {
126  auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo, dataLayout);
127  if (IsMemoryManaged)
128  {
129  tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
130  }
131  return tensorHandle;
132 }
133 
134 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
135  const WorkloadInfo& info) const
136 {
137  return std::make_unique<NeonActivationWorkload>(descriptor, info);
138 }
139 
140 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
141  const WorkloadInfo& info) const
142 {
143  return std::make_unique<NeonAdditionWorkload>(descriptor, info);
144 }
145 
146 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateArgMinMax(const ArgMinMaxQueueDescriptor& descriptor,
147  const WorkloadInfo& info) const
148 {
149  return std::make_unique<NeonArgMinMaxWorkload>(descriptor, info);
150 }
151 
152 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateBatchNormalization(
153  const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
154 {
155  return std::make_unique<NeonBatchNormalizationWorkload>(descriptor, info);
156 }
157 
159  const WorkloadInfo& info) const
160 {
161  return std::make_unique<NeonBatchToSpaceNdWorkload>(descriptor, info);
162 }
163 
164 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateCast(const CastQueueDescriptor& descriptor,
165  const WorkloadInfo& info) const
166 {
167  return std::make_unique<NeonCastWorkload>(descriptor, info);
168 }
169 
171  const WorkloadInfo& info) const
172 {
173  return std::make_unique<NeonChannelShuffleWorkload>(descriptor, info);
174 }
175 
176 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateComparison(const ComparisonQueueDescriptor& descriptor,
177  const WorkloadInfo& info) const
178 {
179  return std::make_unique<NeonComparisonWorkload>(descriptor, info);
180 }
181 
182 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConcat(const ConcatQueueDescriptor& descriptor,
183  const WorkloadInfo& info) const
184 {
185  return std::make_unique<NeonConcatWorkload>(descriptor, info);
186 }
187 
188 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
189  const WorkloadInfo& info) const
190 {
191  return std::make_unique<NeonConstantWorkload>(descriptor, info);
192 }
193 
195  const ConvertBf16ToFp32QueueDescriptor& descriptor,
196  const WorkloadInfo& info) const
197 {
198  return std::make_unique<NeonConvertBf16ToFp32Workload>(descriptor, info);
199 }
200 
202  const ConvertFp16ToFp32QueueDescriptor& descriptor,
203  const WorkloadInfo& info) const
204 {
205  return std::make_unique<NeonConvertFp16ToFp32Workload>(descriptor, info);
206 }
207 
209  const ConvertFp32ToBf16QueueDescriptor& descriptor,
210  const WorkloadInfo& info) const
211 {
212  return std::make_unique<NeonConvertFp32ToBf16Workload>(descriptor, info);
213 }
214 
216  const ConvertFp32ToFp16QueueDescriptor& descriptor,
217  const WorkloadInfo& info) const
218 {
219  return std::make_unique<NeonConvertFp32ToFp16Workload>(descriptor, info);
220 }
221 
222 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution2d(
223  const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
224 {
225  bool isFastMathEnabled = false;
226  if (m_ModelContextPtr)
227  {
228  if (m_ModelContextPtr.get() != nullptr)
229  {
230  auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
231  if (modelOptions)
232  {
233  isFastMathEnabled = modelOptions->IsFastMathEnabled();
234  }
235  }
236  }
237  return std::make_unique<NeonConvolution2dWorkload>(descriptor,
238  info,
239  m_MemoryManager->GetIntraLayerManager(),
240  isFastMathEnabled);
241 }
242 
243 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution3d(
244  const Convolution3dQueueDescriptor& descriptor, const WorkloadInfo& info) const
245 {
246  bool isFastMathEnabled = false;
247  if (m_ModelContextPtr)
248  {
249  if (m_ModelContextPtr.get() != nullptr)
250  {
251  auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
252  if (modelOptions)
253  {
254  isFastMathEnabled = modelOptions->IsFastMathEnabled();
255  }
256  }
257  }
258  return std::make_unique<NeonConvolution3dWorkload>(descriptor,
259  info,
260  m_MemoryManager->GetIntraLayerManager(),
261  isFastMathEnabled);
262 }
263 
264 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor,
265  const WorkloadInfo& info) const
266 {
267  return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
268 }
269 
270 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthToSpace(const DepthToSpaceQueueDescriptor& descriptor,
271  const WorkloadInfo& info) const
272 {
273  return std::make_unique<NeonDepthToSpaceWorkload>(descriptor, info);
274 }
275 
277  const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
278 {
279  return std::make_unique<NeonDepthwiseConvolutionWorkload>(descriptor, info);
280 }
281 
282 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDequantize(const DequantizeQueueDescriptor& descriptor,
283  const WorkloadInfo& info) const
284 {
285  return std::make_unique<NeonDequantizeWorkload>(descriptor, info);
286 }
287 
290 {
291  return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
292 }
293 
294 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateDivision(
295  const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const
296 {
297  return std::make_unique<NeonDivisionWorkload>(descriptor, info);
298 }
299 
301  const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info) const
302 {
303  switch(descriptor.m_Parameters.m_Operation)
304  {
305  case UnaryOperation::Abs:
306  {
307  AbsQueueDescriptor absQueueDescriptor;
308  absQueueDescriptor.m_Inputs = descriptor.m_Inputs;
309  absQueueDescriptor.m_Outputs = descriptor.m_Outputs;
310 
311  return std::make_unique<NeonAbsWorkload>(absQueueDescriptor, info);
312  }
313  case UnaryOperation::Exp:
314  return std::make_unique<NeonExpWorkload>(descriptor, info);
316  return std::make_unique<NeonLogicalNotWorkload>(descriptor, info);
317  case UnaryOperation::Log:
318  return std::make_unique<NeonLogWorkload>(descriptor, info);
319  case UnaryOperation::Neg:
320  return std::make_unique<NeonNegWorkload>(descriptor, info);
322  {
323  RsqrtQueueDescriptor rsqrtQueueDescriptor;
324  rsqrtQueueDescriptor.m_Inputs = descriptor.m_Inputs;
325  rsqrtQueueDescriptor.m_Outputs = descriptor.m_Outputs;
326 
327  return std::make_unique<NeonRsqrtWorkload>(rsqrtQueueDescriptor, info);
328  }
329  case UnaryOperation::Sin:
330  return std::make_unique<NeonSinWorkload>(descriptor, info);
331  default:
332  return nullptr;
333  }
334 }
335 
336 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFill(const FillQueueDescriptor& descriptor,
337  const WorkloadInfo& info) const
338 {
339  return std::make_unique<NeonFillWorkload>(descriptor, info);
340 }
341 
342 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
343  const WorkloadInfo& info) const
344 {
345  return MakeWorkloadHelper<NeonFloorFloatWorkload, NullWorkload>(descriptor, info);
346 }
347 
348 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateFullyConnected(
349  const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const
350 {
351  return std::make_unique<NeonFullyConnectedWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
352 }
353 
354 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateGather(const armnn::GatherQueueDescriptor& descriptor,
355  const armnn::WorkloadInfo& info) const
356 {
357  return std::make_unique<NeonGatherWorkload>(descriptor, info);
358 }
359 
360 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
361  const WorkloadInfo& info) const
362 {
363  return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
364 }
365 
367  const InstanceNormalizationQueueDescriptor& descriptor,
368  const WorkloadInfo& info) const
369 {
370  return std::make_unique<NeonInstanceNormalizationWorkload>(descriptor, info);
371 }
372 
374  const WorkloadInfo& info) const
375 {
376  return MakeWorkloadHelper<NeonL2NormalizationFloatWorkload, NullWorkload>(descriptor, info,
377  m_MemoryManager->GetIntraLayerManager());
378 }
379 
380 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLogSoftmax(const LogSoftmaxQueueDescriptor& descriptor,
381  const WorkloadInfo& info) const
382 {
383  return std::make_unique<NeonLogSoftmaxWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
384 }
385 
386 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLogicalBinary(const LogicalBinaryQueueDescriptor& descriptor,
387  const WorkloadInfo& info) const
388 {
389  switch(descriptor.m_Parameters.m_Operation)
390  {
392  return std::make_unique<NeonLogicalAndWorkload>(descriptor, info);
394  return std::make_unique<NeonLogicalOrWorkload>(descriptor, info);
395  default:
396  return nullptr;
397  }
398 }
399 
400 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor,
401  const WorkloadInfo& info) const
402 {
403  return MakeWorkloadHelper<NeonLstmFloatWorkload, NullWorkload>(descriptor, info);
404 }
405 
406 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMaximum(const MaximumQueueDescriptor& descriptor,
407  const WorkloadInfo& info) const
408 {
409  return std::make_unique<NeonMaximumWorkload>(descriptor, info);
410 }
411 
412 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor,
413  const WorkloadInfo& info) const
414 {
415  return std::make_unique<NeonMeanWorkload>(descriptor, info);
416 }
417 
418 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
419  const WorkloadInfo& info) const
420 {
421  if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
422  {
423  throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemCopy workload");
424  }
425 
426  return MakeWorkloadHelper<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info);
427 }
428 
429 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMemImport(const MemImportQueueDescriptor& descriptor,
430  const WorkloadInfo& info) const
431 {
432  if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
433  {
434  throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemImport workload");
435  }
436 
437  return std::make_unique<ImportMemGenericWorkload>(descriptor, info);
438 }
439 
440 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMinimum(const MinimumQueueDescriptor& descriptor,
441  const WorkloadInfo& info) const
442 {
443  return std::make_unique<NeonMinimumWorkload>(descriptor, info);
444 }
445 
446 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMultiplication(
447  const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const
448 {
449  return std::make_unique<NeonMultiplicationWorkload>(descriptor, info);
450 }
451 
452 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateNormalization(
453  const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
454 {
455  return MakeWorkloadHelper<NeonNormalizationFloatWorkload, NullWorkload>(descriptor, info,
456  m_MemoryManager->GetIntraLayerManager());
457 }
458 
459 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
460  const WorkloadInfo& info) const
461 {
462  return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
463 }
464 
465 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
466  const WorkloadInfo& info) const
467 {
468  return std::make_unique<NeonPadWorkload>(descriptor, info);
469 }
470 
471 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
472  const WorkloadInfo& info) const
473 {
474  return std::make_unique<NeonPermuteWorkload>(descriptor, info);
475 }
476 
477 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
478  const WorkloadInfo& info) const
479 {
480  return std::make_unique<NeonPooling2dWorkload>(descriptor, info);
481 }
482 
483 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor,
484  const WorkloadInfo& info) const
485 {
486  return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
487 }
488 
489 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePrelu(const armnn::PreluQueueDescriptor &descriptor,
490  const armnn::WorkloadInfo &info) const
491 {
492  return std::make_unique<NeonPreluWorkload>(descriptor, info);
493 }
494 
495 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateQLstm(const QLstmQueueDescriptor& descriptor,
496  const WorkloadInfo& info) const
497 {
498  return std::make_unique<NeonQLstmWorkload>(descriptor, info);
499 }
500 
501 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateQuantize(const QuantizeQueueDescriptor& descriptor,
502  const WorkloadInfo& info) const
503 {
504  return std::make_unique<NeonQuantizeWorkload>(descriptor, info);
505 }
506 
507 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateQuantizedLstm(const QuantizedLstmQueueDescriptor& descriptor,
508  const WorkloadInfo& info) const
509 {
510  return std::make_unique<NeonQuantizedLstmWorkload>(descriptor, info);
511 }
512 
513 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateRank(const RankQueueDescriptor& descriptor,
514  const WorkloadInfo& info) const
515 {
516  return std::make_unique<NeonRankWorkload>(descriptor, info);
517 }
518 
519 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReduce(const ReduceQueueDescriptor& descriptor,
520  const WorkloadInfo& info) const
521 {
522  return std::make_unique<NeonReduceWorkload>(descriptor, info);
523 }
524 
525 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
526  const WorkloadInfo& info) const
527 {
528  return std::make_unique<NeonReshapeWorkload>(descriptor, info);
529 }
530 
531 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor,
532  const WorkloadInfo& info) const
533 {
534  return std::make_unique<NeonResizeWorkload>(descriptor, info);
535 }
536 
537 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSlice(const SliceQueueDescriptor& descriptor,
538  const WorkloadInfo& info) const
539 {
540  return std::make_unique<NeonSliceWorkload>(descriptor, info);
541 }
542 
543 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
544  const WorkloadInfo& info) const
545 {
546  return std::make_unique<NeonSoftmaxWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
547 }
548 
550  const WorkloadInfo& info) const
551 {
552  return std::make_unique<NeonSpaceToBatchNdWorkload>(descriptor, info);
553 }
554 
555 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSpaceToDepth(const SpaceToDepthQueueDescriptor& descriptor,
556  const WorkloadInfo& info) const
557 {
558  return std::make_unique<NeonSpaceToDepthWorkload>(descriptor, info);
559 }
560 
561 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
562  const WorkloadInfo& info) const
563 {
564  return std::make_unique<NeonSplitterWorkload>(descriptor, info);
565 }
566 
567 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateStack(const StackQueueDescriptor& descriptor,
568  const WorkloadInfo& info) const
569 {
570  return std::make_unique<NeonStackWorkload>(descriptor, info);
571 }
572 
573 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
574  const WorkloadInfo& info) const
575 {
576  return std::make_unique<NeonStridedSliceWorkload>(descriptor, info);
577 }
578 
579 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateSubtraction(
580  const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info) const
581 {
582  return std::make_unique<NeonSubtractionWorkload>(descriptor, info);
583 }
584 
585 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateTranspose(const TransposeQueueDescriptor& descriptor,
586  const WorkloadInfo& info) const
587 {
588  return std::make_unique<NeonTransposeWorkload>(descriptor, info);
589 }
590 
592  const TransposeConvolution2dQueueDescriptor &descriptor,
593  const WorkloadInfo &info) const
594 {
595  return std::make_unique<NeonTransposeConvolution2dWorkload>(descriptor, info,
596  m_MemoryManager->GetIntraLayerManager());
597 }
598 
599 } // namespace armnn
std::unique_ptr< IWorkload > CreateDepthToSpace(const DepthToSpaceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDebug(const DebugQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateLstm(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateCast(const CastQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateChannelShuffle(const ChannelShuffleQueueDescriptor &descriptor, const WorkloadInfo &info) const override
UnaryOperation m_Operation
Specifies the elementwiseUnary operation to execute.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:61
std::unique_ptr< IWorkload > CreateLogSoftmax(const LogSoftmaxQueueDescriptor &descriptor, const WorkloadInfo &info) const override
DataLayout
Definition: Types.hpp:49
std::unique_ptr< IWorkload > CreateGather(const GatherQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateNormalization(const NormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateFullyConnected(const FullyConnectedQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateComparison(const ComparisonQueueDescriptor &descriptor, const WorkloadInfo &Info) const override
std::unique_ptr< IWorkload > CreatePad(const PadQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::vector< BackendOptions > ModelOptions
std::unique_ptr< IWorkload > CreateMemImport(const MemImportQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
constexpr const char * NeonBackendId()
std::unique_ptr< IWorkload > CreateL2Normalization(const L2NormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateStridedSlice(const StridedSliceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMinimum(const MinimumQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMultiplication(const MultiplicationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateQuantize(const QuantizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
Copyright (c) 2021 ARM Limited and Contributors.
std::unique_ptr< ITensorHandle > CreateSubTensorHandle(ITensorHandle &parent, TensorShape const &subTensorShape, unsigned int const *subTensorOrigin) const override
std::unique_ptr< IWorkload > CreateConvolution2d(const Convolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
LogicalBinaryOperation m_Operation
Specifies the logical operation to execute.
The NeonBackendModelContext is used to pass in Neon specific backend ModelOptions.
NeonWorkloadFactory(const std::shared_ptr< NeonMemoryManager > &memoryManager)
std::unique_ptr< IWorkload > CreateReshape(const ReshapeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
static bool IsLayerSupported(const Layer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
std::unique_ptr< IWorkload > CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSoftmax(const SoftmaxQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSlice(const SliceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
std::unique_ptr< IWorkload > CreateQuantizedLstm(const QuantizedLstmQueueDescriptor &descriptor, const WorkloadInfo &info) const override
const BackendId & GetBackendId() const override
std::unique_ptr< IWorkload > CreateSplitter(const SplitterQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateFill(const FillQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDequantize(const DequantizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMemCopy(const MemCopyQueueDescriptor &descriptor, const WorkloadInfo &info) const override
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
std::unique_ptr< IWorkload > CreateConvolution3d(const Convolution3dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
std::unique_ptr< IWorkload > CreateInstanceNormalization(const InstanceNormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePreCompiled(const PreCompiledQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateLogicalBinary(const LogicalBinaryQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateOutput(const OutputQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConvertFp32ToBf16(const ConvertFp32ToBf16QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConstant(const ConstantQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateStack(const StackQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateFloor(const FloorQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateActivation(const ActivationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::vector< ITensorHandle * > m_Outputs
std::unique_ptr< IWorkload > CreateDivision(const DivisionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition: Tensor.cpp:174
std::unique_ptr< IWorkload > CreateReduce(const ReduceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateInput(const InputQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateQLstm(const QLstmQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateBatchNormalization(const BatchNormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor &descriptor, const WorkloadInfo &Info) const override
Contains information about TensorInfos of a layer.
std::unique_ptr< IWorkload > CreateResize(const ResizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::vector< ITensorHandle * > m_Inputs
std::unique_ptr< IWorkload > CreateConcat(const ConcatQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePooling2d(const Pooling2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor &descriptor, const WorkloadInfo &Info) const override
std::unique_ptr< IWorkload > CreateAddition(const AdditionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDetectionPostProcess(const DetectionPostProcessQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSubtraction(const SubtractionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMaximum(const MaximumQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMean(const MeanQueueDescriptor &descriptor, const WorkloadInfo &Info) const override
std::unique_ptr< IWorkload > CreateConvertBf16ToFp32(const ConvertBf16ToFp32QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateRank(const RankQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateArgMinMax(const ArgMinMaxQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePrelu(const PreluQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateTranspose(const TransposeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor &descriptor, const WorkloadInfo &info) const override
Depthwise Convolution 2D layer workload data.
std::unique_ptr< IWorkload > CreateSpaceToDepth(const SpaceToDepthQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePermute(const PermuteQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo, const bool IsMemoryManaged=true) const override