ArmNN
 21.08
ClWorkloadFactory.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #include "ClWorkloadFactory.hpp"
6 #include "ClBackendId.hpp"
10 
11 #include <Layer.hpp>
12 
13 #include <armnn/Exceptions.hpp>
14 #include <armnn/Utils.hpp>
18 
23 
24 #include <cl/ClTensorHandle.hpp>
27 
28 #include <arm_compute/core/CL/CLKernelLibrary.h>
29 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
30 #include <arm_compute/runtime/CL/CLScheduler.h>
31 
33 #include <fstream>
34 
35 namespace armnn
36 {
37 
38 namespace
39 {
40 static const BackendId s_Id{ClBackendId()};
41 }
42 
44  Optional<DataType> dataType,
45  std::string& outReasonIfUnsupported)
46 {
47  return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
48 }
49 
51  Optional<DataType> dataType,
52  std::string& outReasonIfUnsupported,
53  const ModelOptions& modelOptions)
54 {
55  return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions);
56 }
57 
59 {
60  return s_Id;
61 }
62 
64 {
65  if(m_ModelContextPtr)
66  {
67  auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
68  if (modelOptions->SaveCachedNetwork())
69  {
70  // Save map to a filepath provided in ModelOptions
71  auto filePath = modelOptions->GetCachedNetworkFilePath();
72  if (filePath != "" && fs::exists(filePath) && fs::is_regular_file(filePath))
73  {
74  // Serialize ClContext to the file specified
76  serializer.Serialize(m_CLCompileContext);
77  std::ofstream file(filePath, std::ios::out | std::ios::binary);
78  serializer.SaveSerializedToStream(file);
79  }
80  }
81  }
82 }
83 
84 template <typename FloatWorkload, typename Uint8Workload, typename QueueDescriptorType, typename... Args>
85 std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(const QueueDescriptorType& descriptor,
86  const WorkloadInfo& info,
87  Args&&... args)
88 {
89  try
90  {
91  return MakeWorkloadHelper<FloatWorkload, Uint8Workload>(descriptor, info, std::forward<Args>(args)...);
92  }
93  catch (const cl::Error& clError)
94  {
95  throw WrapClError(clError, CHECK_LOCATION());
96  }
97 }
98 
99 template <typename Workload, typename QueueDescriptorType, typename... Args>
100 std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(const QueueDescriptorType& descriptor,
101  const WorkloadInfo& info,
102  Args&&... args)
103 {
104  try
105  {
106  return std::make_unique<Workload>(descriptor, info, std::forward<Args>(args)...);
107  }
108  catch (const cl::Error& clError)
109  {
110  throw WrapClError(clError, CHECK_LOCATION());
111  }
112 }
113 
114 void ClWorkloadFactory::InitializeCLCompileContext()
115 {
116  // Initialize our m_CLCompileContext using default device and context
117  auto context = arm_compute::CLKernelLibrary::get().context();
118  auto device = arm_compute::CLKernelLibrary::get().get_device();
119  m_CLCompileContext = arm_compute::CLCompileContext(context, device);
120 
121  if (m_ModelContextPtr)
122  {
123  // Load saved programs if the user has set a filepath
124  auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
125  auto filePath = modelOptions->GetCachedNetworkFilePath();
126  if (filePath != ""
127  && fs::exists(filePath)
128  && fs::is_regular_file(filePath)
129  && !(modelOptions->SaveCachedNetwork()))
130  {
131  // Deserialize binary file and load into m_CLCompileContext
132  ClContextDeserializer deserializer;
133  deserializer.Deserialize(m_CLCompileContext, context, device, filePath);
134  }
135  }
136 }
137 
138 ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager)
139  : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{})
140 {
141  InitializeCLCompileContext();
142 }
143 
144 ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager,
146  : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr)
147 {
148  InitializeCLCompileContext();
149 }
150 
151 std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
152  const bool IsMemoryManaged) const
153 {
154  IgnoreUnused(IsMemoryManaged);
155  std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo);
156  tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
157 
158  return tensorHandle;
159 }
160 
161 std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
162  DataLayout dataLayout,
163  const bool IsMemoryManaged) const
164 {
165  IgnoreUnused(IsMemoryManaged);
166  std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo, dataLayout);
167  tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
168 
169  return tensorHandle;
170 }
171 
172 std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
173  TensorShape const& subTensorShape,
174  unsigned int const* subTensorOrigin) const
175 {
177  arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape);
178 
179  coords.set_num_dimensions(subTensorShape.GetNumDimensions());
180  for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++)
181  {
182  // Arm compute indexes tensor coords in reverse order.
183  unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1;
184  coords.set(i, armnn::numeric_cast<int>(subTensorOrigin[revertedIndex]));
185  }
186 
187  const arm_compute::TensorShape parentShape = armcomputetensorutils::BuildArmComputeTensorShape(parent.GetShape());
188  if (!::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, parentShape, coords, shape))
189  {
190  return nullptr;
191  }
192 
193  return std::make_unique<ClSubTensorHandle>(
194  PolymorphicDowncast<IClTensorHandle*>(&parent), shape, coords);
195 }
196 
197 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateAbs(const AbsQueueDescriptor& descriptor,
198  const WorkloadInfo& info) const
199 {
200  IgnoreUnused(descriptor);
201 
202  ElementwiseUnaryQueueDescriptor elementwiseUnaryDescriptor;
203  elementwiseUnaryDescriptor.m_Parameters = ElementwiseUnaryDescriptor(UnaryOperation::Abs);
204 
205  return CreateElementwiseUnary(elementwiseUnaryDescriptor, info);
206 }
207 
208 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
209  const WorkloadInfo& info) const
210 {
211  return MakeWorkload<ClActivationWorkload>(descriptor, info, m_CLCompileContext);
212 }
213 
214 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
215  const WorkloadInfo& info) const
216 {
217  return MakeWorkload<ClAdditionWorkload>(descriptor, info, m_CLCompileContext);
218 }
219 
220 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateArgMinMax(const ArgMinMaxQueueDescriptor& descriptor,
221  const WorkloadInfo& info) const
222 {
223  return std::make_unique<ClArgMinMaxWorkload>(descriptor, info, m_CLCompileContext);
224 }
225 
227  const BatchNormalizationQueueDescriptor& descriptor,
228  const WorkloadInfo& info) const
229 {
230  return MakeWorkload<ClBatchNormalizationFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
231 }
232 
233 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
234  const WorkloadInfo& info) const
235 {
236  return MakeWorkload<ClBatchToSpaceNdWorkload>(descriptor, info, m_CLCompileContext);
237 }
238 
239 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateCast(const CastQueueDescriptor& descriptor,
240  const WorkloadInfo& info) const
241 {
242  return MakeWorkload<ClCastWorkload>(descriptor, info, m_CLCompileContext);
243 }
244 
245 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateComparison(const ComparisonQueueDescriptor& descriptor,
246  const WorkloadInfo& info) const
247 {
248  return MakeWorkload<ClComparisonWorkload>(descriptor, info, m_CLCompileContext);
249 }
250 
251 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConcat(const ConcatQueueDescriptor& descriptor,
252  const WorkloadInfo& info) const
253 {
254  return MakeWorkload<ClConcatWorkload>(descriptor, info, m_CLCompileContext);
255 }
256 
257 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
258  const WorkloadInfo& info) const
259 {
260  return MakeWorkload<ClConstantWorkload>(descriptor, info, m_CLCompileContext);
261 }
262 
264  const ConvertFp16ToFp32QueueDescriptor& descriptor,
265  const WorkloadInfo& info) const
266 {
267  return MakeWorkload<ClConvertFp16ToFp32Workload>(descriptor, info, m_CLCompileContext);
268 }
269 
271  const ConvertFp32ToFp16QueueDescriptor& descriptor,
272  const WorkloadInfo& info) const
273 {
274  return MakeWorkload<ClConvertFp32ToFp16Workload>(descriptor, info, m_CLCompileContext);
275 }
276 
277 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
278  const WorkloadInfo& info) const
279 {
280  bool isFastMathEnabled = false;
281  if (m_ModelContextPtr)
282  {
283  if (m_ModelContextPtr.get() != nullptr)
284  {
285  auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
286  if (modelOptions)
287  {
288  isFastMathEnabled = modelOptions->IsFastMathEnabled();
289  }
290  }
291  }
292  return MakeWorkload<ClConvolution2dWorkload>(descriptor,
293  info,
294  m_MemoryManager->GetIntraLayerManager(),
295  m_CLCompileContext,
296  isFastMathEnabled);
297 }
298 
299 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor,
300  const WorkloadInfo& info) const
301 {
302  return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
303 }
304 
305 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthToSpace(const DepthToSpaceQueueDescriptor& descriptor,
306  const WorkloadInfo& info) const
307 {
308  return MakeWorkload<ClDepthToSpaceWorkload>(descriptor, info, m_CLCompileContext);
309 }
310 
312  const DepthwiseConvolution2dQueueDescriptor& descriptor,
313  const WorkloadInfo& info) const
314 {
315  return MakeWorkload<ClDepthwiseConvolutionWorkload>(descriptor, info, m_CLCompileContext);
316 }
317 
318 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDequantize(const DequantizeQueueDescriptor& descriptor,
319  const WorkloadInfo& info) const
320 {
321  return MakeWorkload<ClDequantizeWorkload>(descriptor, info, m_CLCompileContext);
322 }
323 
325  const DetectionPostProcessQueueDescriptor& descriptor,
326  const WorkloadInfo& info) const
327 {
328  return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
329 }
330 
331 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& descriptor,
332  const WorkloadInfo& info) const
333 {
334  return std::make_unique<ClDivisionWorkload>(descriptor, info, m_CLCompileContext);
335 }
336 
338  const WorkloadInfo& info) const
339 {
340  switch(descriptor.m_Parameters.m_Operation)
341  {
342  case UnaryOperation::Abs:
343  {
344  AbsQueueDescriptor absQueueDescriptor;
345  absQueueDescriptor.m_Inputs = descriptor.m_Inputs;
346  absQueueDescriptor.m_Outputs = descriptor.m_Outputs;
347 
348  return std::make_unique<ClAbsWorkload>(absQueueDescriptor, info, m_CLCompileContext);
349  }
350  case UnaryOperation::Exp:
351  return std::make_unique<ClExpWorkload>(descriptor, info, m_CLCompileContext);
352  case UnaryOperation::Log:
353  return std::make_unique<ClLogWorkload>(descriptor, info, m_CLCompileContext);
355  return std::make_unique<ClLogicalNotWorkload>(descriptor, info, m_CLCompileContext);
356  case UnaryOperation::Neg:
357  return std::make_unique<ClNegWorkload>(descriptor, info, m_CLCompileContext);
359  {
360  RsqrtQueueDescriptor rsqrtQueueDescriptor;
361  rsqrtQueueDescriptor.m_Inputs = descriptor.m_Inputs;
362  rsqrtQueueDescriptor.m_Outputs = descriptor.m_Outputs;
363 
364  return std::make_unique<ClRsqrtWorkload>(rsqrtQueueDescriptor, info, m_CLCompileContext);
365  }
366  case UnaryOperation::Sin:
367  return std::make_unique<ClSinWorkload>(descriptor, info, m_CLCompileContext);
368  default:
369  return nullptr;
370  }
371 }
372 
373 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateEqual(const EqualQueueDescriptor& descriptor,
374  const WorkloadInfo& info) const
375 {
376  IgnoreUnused(descriptor);
377 
378  ComparisonQueueDescriptor comparisonDescriptor;
380 
381  return CreateComparison(comparisonDescriptor, info);
382 }
383 
384 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFill(const FillQueueDescriptor& descriptor,
385  const WorkloadInfo& info) const
386 {
387  return std::make_unique<ClFillWorkload>(descriptor, info, m_CLCompileContext);
388 }
389 
390 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
391  const WorkloadInfo& info) const
392 {
393  return MakeWorkload<ClFloorFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
394 }
395 
396 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor,
397  const WorkloadInfo& info) const
398 {
399  return MakeWorkload<ClFullyConnectedWorkload>(descriptor,
400  info,
401  m_MemoryManager->GetIntraLayerManager(),
402  m_CLCompileContext);
403 }
404 
405 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateGather(const GatherQueueDescriptor& descriptor,
406  const WorkloadInfo& info) const
407 {
408  return MakeWorkload<ClGatherWorkload>(descriptor, info, m_CLCompileContext);
409 }
410 
411 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateGreater(const GreaterQueueDescriptor& descriptor,
412  const WorkloadInfo& info) const
413 {
414  IgnoreUnused(descriptor);
415 
416  ComparisonQueueDescriptor comparisonDescriptor;
418 
419  return CreateComparison(comparisonDescriptor, info);
420 }
421 
422 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
423  const WorkloadInfo& info) const
424 {
425  return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
426 }
427 
429  const InstanceNormalizationQueueDescriptor& descriptor,
430  const WorkloadInfo& info) const
431 {
432  return MakeWorkload<ClInstanceNormalizationWorkload>(descriptor, info, m_CLCompileContext);
433 }
434 
436  const WorkloadInfo& info) const
437 {
438  return MakeWorkload<ClL2NormalizationFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
439 }
440 
441 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLogicalBinary(const LogicalBinaryQueueDescriptor& descriptor,
442  const WorkloadInfo& info) const
443 {
444  switch(descriptor.m_Parameters.m_Operation)
445  {
447  return std::make_unique<ClLogicalAndWorkload>(descriptor, info, m_CLCompileContext);
449  return std::make_unique<ClLogicalOrWorkload>(descriptor, info, m_CLCompileContext);
450  default:
451  return nullptr;
452  }
453 }
454 
455 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLogSoftmax(const LogSoftmaxQueueDescriptor& descriptor,
456  const WorkloadInfo& info) const
457 {
458  return MakeWorkload<ClLogSoftmaxWorkload>(descriptor,
459  info,
460  m_MemoryManager->GetIntraLayerManager(),
461  m_CLCompileContext);
462 }
463 
464 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor,
465  const WorkloadInfo& info) const
466 {
467  return MakeWorkload<ClLstmFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
468 }
469 
470 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMaximum(const MaximumQueueDescriptor& descriptor,
471  const WorkloadInfo& info) const
472 {
473  return MakeWorkload<ClMaximumWorkload>(descriptor, info, m_CLCompileContext);
474 }
475 
476 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor,
477  const WorkloadInfo& info) const
478 {
479  return MakeWorkload<ClMeanWorkload>(descriptor, info, m_CLCompileContext);
480 }
481 
482 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
483  const WorkloadInfo& info) const
484 {
485  if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
486  {
487  throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemCopy workload");
488  }
489 
490  return MakeWorkload<CopyMemGenericWorkload>(descriptor, info);
491 }
492 
493 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMemImport(const MemImportQueueDescriptor& descriptor,
494  const WorkloadInfo& info) const
495 {
496  if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
497  {
498  throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemImport workload");
499  }
500 
501  return std::make_unique<ImportMemGenericWorkload>(descriptor, info);
502 }
503 
504 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor,
505  const WorkloadInfo& info) const
506 {
507  return CreateConcat(descriptor, info);
508 }
509 
510 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMinimum(const MinimumQueueDescriptor& descriptor,
511  const WorkloadInfo& info) const
512 {
513  return MakeWorkload<ClMinimumWorkload>(descriptor, info, m_CLCompileContext);
514 }
515 
516 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& descriptor,
517  const WorkloadInfo& info) const
518 {
519  return MakeWorkload<ClMultiplicationWorkload>(descriptor, info, m_CLCompileContext);
520 }
521 
522 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor,
523  const WorkloadInfo& info) const
524 {
525  return MakeWorkload<ClNormalizationFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
526 }
527 
528 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
529  const WorkloadInfo& info) const
530 {
531  return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
532 }
533 
534 std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
535  const WorkloadInfo& info) const
536 {
537  return MakeWorkload<ClPadWorkload>(descriptor, info, m_CLCompileContext);
538 }
539 
540 std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
541  const WorkloadInfo& info) const
542 {
543  return MakeWorkload<ClPermuteWorkload>(descriptor, info, m_CLCompileContext);
544 }
545 
546 std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
547  const WorkloadInfo& info) const
548 {
549  return MakeWorkload<ClPooling2dWorkload>(descriptor, info, m_CLCompileContext);
550 }
551 
552 std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor,
553  const WorkloadInfo& info) const
554 {
555  return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
556 }
557 
558 std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePrelu(const PreluQueueDescriptor &descriptor,
559  const WorkloadInfo &info) const
560 {
561  return MakeWorkload<ClPreluWorkload>(descriptor, info, m_CLCompileContext);
562 }
563 
564 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQLstm(const QLstmQueueDescriptor& descriptor,
565  const WorkloadInfo& info) const
566 {
567  return std::make_unique<ClQLstmWorkload>(descriptor, info, m_CLCompileContext);
568 }
569 
570 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQuantize(const QuantizeQueueDescriptor& descriptor,
571  const WorkloadInfo& info) const
572 {
573  return MakeWorkload<ClQuantizeWorkload>(descriptor, info, m_CLCompileContext);
574 }
575 
576 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQuantizedLstm(const QuantizedLstmQueueDescriptor& descriptor,
577  const WorkloadInfo& info) const
578 {
579  return MakeWorkload<ClQuantizedLstmWorkload>(descriptor, info, m_CLCompileContext);
580 }
581 
582 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateRank(const RankQueueDescriptor& descriptor,
583  const WorkloadInfo& info) const
584 {
585  return std::make_unique<ClRankWorkload>(descriptor, info);
586 }
587 
588 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateReduce(const ReduceQueueDescriptor& descriptor,
589  const WorkloadInfo& info) const
590 {
591  return std::make_unique<ClReduceWorkload>(descriptor, info);
592 }
593 
594 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
595  const WorkloadInfo& info) const
596 {
597  return MakeWorkload<ClReshapeWorkload>(descriptor, info, m_CLCompileContext);
598 }
599 
600 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor,
601  const WorkloadInfo& info) const
602 {
603  return MakeWorkload<ClResizeWorkload>(descriptor, info, m_CLCompileContext);
604 }
605 
606 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor,
607  const WorkloadInfo& info) const
608 {
609  ResizeQueueDescriptor resizeDescriptor;
610  resizeDescriptor.m_Inputs = descriptor.m_Inputs;
611  resizeDescriptor.m_Outputs = descriptor.m_Outputs;
612 
613  resizeDescriptor.m_Parameters.m_Method = ResizeMethod::Bilinear;
614  resizeDescriptor.m_Parameters.m_DataLayout = descriptor.m_Parameters.m_DataLayout;
615  resizeDescriptor.m_Parameters.m_TargetHeight = descriptor.m_Parameters.m_TargetHeight;
616  resizeDescriptor.m_Parameters.m_TargetWidth = descriptor.m_Parameters.m_TargetWidth;
617 
618  return CreateResize(resizeDescriptor, info);
619 }
620 
621 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateRsqrt(const RsqrtQueueDescriptor& descriptor,
622  const WorkloadInfo& info) const
623 {
624  IgnoreUnused(descriptor);
625 
626  ElementwiseUnaryQueueDescriptor elementwiseUnaryDescriptor;
627  elementwiseUnaryDescriptor.m_Parameters = ElementwiseUnaryDescriptor(UnaryOperation::Rsqrt);
628 
629  return CreateElementwiseUnary(elementwiseUnaryDescriptor, info);
630 }
631 
632 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSlice(const SliceQueueDescriptor& descriptor,
633  const WorkloadInfo& info) const
634 {
635  return MakeWorkload<ClSliceWorkload>(descriptor, info, m_CLCompileContext);
636 }
637 
638 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
639  const WorkloadInfo& info) const
640 {
641  return std::make_unique<ClSoftmaxWorkload>(descriptor,
642  info,
643  m_MemoryManager->GetIntraLayerManager(),
644  m_CLCompileContext);
645 }
646 
647 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
648  const WorkloadInfo& info) const
649 {
650  return MakeWorkload<ClSpaceToBatchNdWorkload>(descriptor, info, m_CLCompileContext);
651 }
652 
653 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSpaceToDepth(const SpaceToDepthQueueDescriptor& descriptor,
654  const WorkloadInfo& info) const
655 {
656  return MakeWorkload<ClSpaceToDepthWorkload>(descriptor, info, m_CLCompileContext);
657 }
658 
659 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
660  const WorkloadInfo& info) const
661 {
662  return MakeWorkload<ClSplitterWorkload>(descriptor, info, m_CLCompileContext);
663 }
664 
665 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateStack(const StackQueueDescriptor& descriptor,
666  const WorkloadInfo& info) const
667 {
668  return MakeWorkload<ClStackWorkload>(descriptor, info, m_CLCompileContext);
669 }
670 
671 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
672  const WorkloadInfo& info) const
673 {
674  return MakeWorkload<ClStridedSliceWorkload>(descriptor, info, m_CLCompileContext);
675 }
676 
677 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& descriptor,
678  const WorkloadInfo& info) const
679 {
680  return MakeWorkload<ClSubtractionWorkload>(descriptor, info, m_CLCompileContext);
681 }
682 
683 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateTranspose(const TransposeQueueDescriptor& descriptor,
684  const WorkloadInfo& info) const
685 {
686  return MakeWorkload<ClTransposeWorkload>(descriptor, info, m_CLCompileContext);
687 }
688 
690  const TransposeConvolution2dQueueDescriptor& descriptor,
691  const WorkloadInfo& info) const
692 {
693  return MakeWorkload<ClTransposeConvolution2dWorkload>(descriptor,
694  info,
695  m_MemoryManager->GetIntraLayerManager(),
696  m_CLCompileContext);
697 }
698 
699 } // namespace armnn
std::unique_ptr< IWorkload > CreateDetectionPostProcess(const DetectionPostProcessQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< ITensorHandle > CreateSubTensorHandle(ITensorHandle &parent, TensorShape const &subTensorShape, unsigned int const *subTensorOrigin) const override
std::unique_ptr< IWorkload > CreateComparison(const ComparisonQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateGather(const GatherQueueDescriptor &descriptor, const WorkloadInfo &info) const override
ClWorkloadFactory(const std::shared_ptr< ClMemoryManager > &memoryManager)
UnaryOperation m_Operation
Specifies the elementwiseUnary operation to execute.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:61
std::unique_ptr< IWorkload > CreateDebug(const DebugQueueDescriptor &descriptor, const WorkloadInfo &info) const override
DataLayout
Definition: Types.hpp:53
std::unique_ptr< IWorkload > CreateReshape(const ReshapeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
void AfterWorkloadsCreated() override
std::unique_ptr< IWorkload > CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDivision(const DivisionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
A ComparisonDescriptor for the ComparisonLayer.
Definition: Descriptors.hpp:78
uint32_t m_TargetWidth
Target width value.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
constexpr const char * ClBackendId()
Definition: ClBackendId.hpp:10
std::vector< BackendOptions > ModelOptions
std::unique_ptr< IWorkload > CreateStridedSlice(const StridedSliceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateQuantize(const QuantizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
std::unique_ptr< IWorkload > CreateInput(const InputQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateStack(const StackQueueDescriptor &descriptor, const WorkloadInfo &info) const override
ResizeMethod m_Method
The Interpolation method to use (Bilinear, NearestNeighbor).
std::unique_ptr< IWorkload > CreateFullyConnected(const FullyConnectedQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSlice(const SliceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo, const bool IsMemoryManaged=true) const override
std::unique_ptr< IWorkload > CreateQuantizedLstm(const QuantizedLstmQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateRank(const RankQueueDescriptor &descriptor, const WorkloadInfo &info) const override
const BackendId & GetBackendId() const override
bool SaveSerializedToStream(std::ostream &stream)
Serializes the ClContext to the stream.
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
TypedWorkload< QueueDescriptor, armnn::DataType::Float16, armnn::DataType::Float32 > FloatWorkload
Definition: Workload.hpp:170
std::unique_ptr< IWorkload > CreateAbs(const AbsQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateLstm(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info) const override
LogicalBinaryOperation m_Operation
Specifies the logical operation to execute.
std::unique_ptr< IWorkload > CreateLogicalBinary(const LogicalBinaryQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateLogSoftmax(const LogSoftmaxQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMean(const MeanQueueDescriptor &descriptor, const WorkloadInfo &Info) const override
std::unique_ptr< IWorkload > CreateReduce(const ReduceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMinimum(const MinimumQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateFill(const FillQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateCast(const CastQueueDescriptor &descriptor, const WorkloadInfo &info) const override
static bool IsLayerSupported(const Layer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
std::unique_ptr< IWorkload > CreateQLstm(const QLstmQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDepthToSpace(const DepthToSpaceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
std::unique_ptr< IWorkload > CreateResize(const ResizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePermute(const PermuteQueueDescriptor &descriptor, const WorkloadInfo &info) const override
void Serialize(const arm_compute::CLCompileContext &clCompileContext)
Serializes the CLCompileContext built-in programs.
std::unique_ptr< IWorkload > CreateMemCopy(const MemCopyQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor &descriptor, const WorkloadInfo &info) const override
uint32_t m_TargetWidth
Target width value.
std::unique_ptr< IWorkload > CreateSoftmax(const SoftmaxQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMaximum(const MaximumQueueDescriptor &descriptor, const WorkloadInfo &info) const override
RuntimeException WrapClError(const cl::Error &clError, const CheckLocation &location)
std::unique_ptr< IWorkload > CreateEqual(const EqualQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateFloor(const FloorQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePreCompiled(const PreCompiledQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSubtraction(const SubtractionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
std::unique_ptr< IWorkload > CreateMultiplication(const MultiplicationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateAddition(const AdditionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePooling2d(const Pooling2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
#define CHECK_LOCATION()
Definition: Exceptions.hpp:197
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
uint32_t m_TargetHeight
Target height value.
uint32_t m_TargetHeight
Target height value.
std::unique_ptr< IWorkload > CreateMerger(const MergerQueueDescriptor &descriptor, const WorkloadInfo &info) const override
void Deserialize(arm_compute::CLCompileContext &clCompileContext, cl::Context &context, cl::Device &device, const std::string &filePath)
Deserializes the CLCompileContext built-in programs from a binary file.
TypedWorkload< QueueDescriptor, armnn::DataType::QAsymmU8 > Uint8Workload
Definition: Workload.hpp:176
std::unique_ptr< IWorkload > CreateRsqrt(const RsqrtQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSplitter(const SplitterQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateGreater(const GreaterQueueDescriptor &descriptor, const WorkloadInfo &info) const override
A ElementwiseUnaryDescriptor for the ElementwiseUnaryLayer.
Definition: Descriptors.hpp:98
std::unique_ptr< IWorkload > CreateL2Normalization(const L2NormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateActivation(const ActivationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::vector< ITensorHandle * > m_Outputs
std::unique_ptr< IWorkload > CreatePad(const PadQueueDescriptor &descriptor, const WorkloadInfo &info) const override
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition: Tensor.cpp:174
std::unique_ptr< IWorkload > CreateSpaceToDepth(const SpaceToDepthQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateNormalization(const NormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateInstanceNormalization(const InstanceNormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateTranspose(const TransposeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
Contains information about TensorInfos of a layer.
std::vector< ITensorHandle * > m_Inputs
std::unique_ptr< IWorkload > CreateBatchNormalization(const BatchNormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConcat(const ConcatQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMemImport(const MemImportQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateResizeBilinear(const ResizeBilinearQueueDescriptor &descriptor, const WorkloadInfo &info) const override
The ClBackendModelContext is used to pass in CL specific backend ModelOptions.
std::unique_ptr< IWorkload > CreatePrelu(const PreluQueueDescriptor &descriptor, const WorkloadInfo &info) const override
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
std::unique_ptr< IWorkload > CreateDequantize(const DequantizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConstant(const ConstantQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConvolution2d(const Convolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateArgMinMax(const ArgMinMaxQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::string GetCachedNetworkFilePath() const
Depthwise Convolution 2D layer workload data.
std::unique_ptr< IWorkload > CreateOutput(const OutputQueueDescriptor &descriptor, const WorkloadInfo &info) const override