ArmNN
 21.05
ClWorkloadFactory.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #include "ClWorkloadFactory.hpp"
6 #include "ClBackendId.hpp"
10 
11 #include <Layer.hpp>
12 
13 #include <armnn/Exceptions.hpp>
14 #include <armnn/Utils.hpp>
18 
23 
24 #include <cl/ClTensorHandle.hpp>
27 
28 #include <arm_compute/core/CL/CLKernelLibrary.h>
29 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
30 #include <arm_compute/runtime/CL/CLScheduler.h>
31 
32 #include <Filesystem.hpp>
33 #include <fstream>
34 
35 namespace armnn
36 {
37 
38 namespace
39 {
40 static const BackendId s_Id{ClBackendId()};
41 }
42 
44  Optional<DataType> dataType,
45  std::string& outReasonIfUnsupported)
46 {
47  return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
48 }
49 
51  Optional<DataType> dataType,
52  std::string& outReasonIfUnsupported,
53  const ModelOptions& modelOptions)
54 {
55  return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions);
56 }
57 
59 {
60  return s_Id;
61 }
62 
64 {
65  if(m_ModelContextPtr)
66  {
67  auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
68  if (modelOptions->SaveCachedNetwork())
69  {
70  // Save map to a filepath provided in ModelOptions
71  auto filePath = modelOptions->GetCachedNetworkFilePath();
72  if (filePath != "" && fs::exists(filePath) && fs::is_regular_file(filePath))
73  {
74  // Serialize ClContext to the file specified
76  serializer.Serialize(m_CLCompileContext);
77  std::ofstream file(filePath, std::ios::out | std::ios::binary);
78  serializer.SaveSerializedToStream(file);
79  }
80  }
81  }
82 }
83 
84 template <typename FloatWorkload, typename Uint8Workload, typename QueueDescriptorType, typename... Args>
85 std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(const QueueDescriptorType& descriptor,
86  const WorkloadInfo& info,
87  Args&&... args)
88 {
89  try
90  {
91  return MakeWorkloadHelper<FloatWorkload, Uint8Workload>(descriptor, info, std::forward<Args>(args)...);
92  }
93  catch (const cl::Error& clError)
94  {
95  throw WrapClError(clError, CHECK_LOCATION());
96  }
97 }
98 
99 template <typename Workload, typename QueueDescriptorType, typename... Args>
100 std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(const QueueDescriptorType& descriptor,
101  const WorkloadInfo& info,
102  Args&&... args)
103 {
104  try
105  {
106  return std::make_unique<Workload>(descriptor, info, std::forward<Args>(args)...);
107  }
108  catch (const cl::Error& clError)
109  {
110  throw WrapClError(clError, CHECK_LOCATION());
111  }
112 }
113 
114 void ClWorkloadFactory::InitializeCLCompileContext()
115 {
116  // Initialize our m_CLCompileContext using default device and context
117  auto context = arm_compute::CLKernelLibrary::get().context();
118  auto device = arm_compute::CLKernelLibrary::get().get_device();
119  m_CLCompileContext = arm_compute::CLCompileContext(context, device);
120 
121  if (m_ModelContextPtr)
122  {
123  // Load saved programs if the user has set a filepath
124  auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
125  auto filePath = modelOptions->GetCachedNetworkFilePath();
126  if (filePath != ""
127  && fs::exists(filePath)
128  && fs::is_regular_file(filePath)
129  && !(modelOptions->SaveCachedNetwork()))
130  {
131  // Deserialize binary file and load into m_CLCompileContext
132  ClContextDeserializer deserializer;
133  deserializer.Deserialize(m_CLCompileContext, context, device, filePath);
134  }
135  }
136 }
137 
138 ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager)
139  : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{})
140 {
141  InitializeCLCompileContext();
142 }
143 
144 ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager,
146  : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr)
147 {
148  InitializeCLCompileContext();
149 }
150 
151 std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
152  const bool IsMemoryManaged) const
153 {
154  IgnoreUnused(IsMemoryManaged);
155  std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo);
156  tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
157 
158  return tensorHandle;
159 }
160 
161 std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
162  DataLayout dataLayout,
163  const bool IsMemoryManaged) const
164 {
165  IgnoreUnused(IsMemoryManaged);
166  std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo, dataLayout);
167  tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
168 
169  return tensorHandle;
170 }
171 
172 std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
173  TensorShape const& subTensorShape,
174  unsigned int const* subTensorOrigin) const
175 {
177  arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape);
178 
179  coords.set_num_dimensions(subTensorShape.GetNumDimensions());
180  for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++)
181  {
182  // Arm compute indexes tensor coords in reverse order.
183  unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1;
184  coords.set(i, armnn::numeric_cast<int>(subTensorOrigin[revertedIndex]));
185  }
186 
187  const arm_compute::TensorShape parentShape = armcomputetensorutils::BuildArmComputeTensorShape(parent.GetShape());
188  if (!::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, parentShape, coords, shape))
189  {
190  return nullptr;
191  }
192 
193  return std::make_unique<ClSubTensorHandle>(
194  PolymorphicDowncast<IClTensorHandle*>(&parent), shape, coords);
195 }
196 
197 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateAbs(const AbsQueueDescriptor& descriptor,
198  const WorkloadInfo& info) const
199 {
200  IgnoreUnused(descriptor);
201 
202  ElementwiseUnaryQueueDescriptor elementwiseUnaryDescriptor;
203  elementwiseUnaryDescriptor.m_Parameters = ElementwiseUnaryDescriptor(UnaryOperation::Abs);
204 
205  return CreateElementwiseUnary(elementwiseUnaryDescriptor, info);
206 }
207 
208 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
209  const WorkloadInfo& info) const
210 {
211  return MakeWorkload<ClActivationWorkload>(descriptor, info, m_CLCompileContext);
212 }
213 
214 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
215  const WorkloadInfo& info) const
216 {
217  return MakeWorkload<ClAdditionWorkload>(descriptor, info, m_CLCompileContext);
218 }
219 
220 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateArgMinMax(const ArgMinMaxQueueDescriptor& descriptor,
221  const WorkloadInfo& info) const
222 {
223  return std::make_unique<ClArgMinMaxWorkload>(descriptor, info, m_CLCompileContext);
224 }
225 
227  const BatchNormalizationQueueDescriptor& descriptor,
228  const WorkloadInfo& info) const
229 {
230  return MakeWorkload<ClBatchNormalizationFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
231 }
232 
233 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
234  const WorkloadInfo& info) const
235 {
236  return MakeWorkload<ClBatchToSpaceNdWorkload>(descriptor, info, m_CLCompileContext);
237 }
238 
239 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateCast(const CastQueueDescriptor& descriptor,
240  const WorkloadInfo& info) const
241 {
242  return MakeWorkload<ClCastWorkload>(descriptor, info, m_CLCompileContext);
243 }
244 
245 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateComparison(const ComparisonQueueDescriptor& descriptor,
246  const WorkloadInfo& info) const
247 {
248  return MakeWorkload<ClComparisonWorkload>(descriptor, info, m_CLCompileContext);
249 }
250 
251 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConcat(const ConcatQueueDescriptor& descriptor,
252  const WorkloadInfo& info) const
253 {
254  return MakeWorkload<ClConcatWorkload>(descriptor, info, m_CLCompileContext);
255 }
256 
257 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
258  const WorkloadInfo& info) const
259 {
260  return MakeWorkload<ClConstantWorkload>(descriptor, info, m_CLCompileContext);
261 }
262 
264  const ConvertFp16ToFp32QueueDescriptor& descriptor,
265  const WorkloadInfo& info) const
266 {
267  return MakeWorkload<ClConvertFp16ToFp32Workload>(descriptor, info, m_CLCompileContext);
268 }
269 
271  const ConvertFp32ToFp16QueueDescriptor& descriptor,
272  const WorkloadInfo& info) const
273 {
274  return MakeWorkload<ClConvertFp32ToFp16Workload>(descriptor, info, m_CLCompileContext);
275 }
276 
277 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
278  const WorkloadInfo& info) const
279 {
280  bool isFastMathEnabled = false;
281  if (m_ModelContextPtr)
282  {
283  if (m_ModelContextPtr.get() != nullptr)
284  {
285  auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
286  if (modelOptions)
287  {
288  isFastMathEnabled = modelOptions->IsFastMathEnabled();
289  }
290  }
291  }
292  return MakeWorkload<ClConvolution2dWorkload>(descriptor,
293  info,
294  m_MemoryManager->GetIntraLayerManager(),
295  m_CLCompileContext,
296  isFastMathEnabled);
297 }
298 
299 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor,
300  const WorkloadInfo& info) const
301 {
302  return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
303 }
304 
305 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthToSpace(const DepthToSpaceQueueDescriptor& descriptor,
306  const WorkloadInfo& info) const
307 {
308  return MakeWorkload<ClDepthToSpaceWorkload>(descriptor, info, m_CLCompileContext);
309 }
310 
312  const DepthwiseConvolution2dQueueDescriptor& descriptor,
313  const WorkloadInfo& info) const
314 {
315  return MakeWorkload<ClDepthwiseConvolutionWorkload>(descriptor, info, m_CLCompileContext);
316 }
317 
318 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDequantize(const DequantizeQueueDescriptor& descriptor,
319  const WorkloadInfo& info) const
320 {
321  return MakeWorkload<ClDequantizeWorkload>(descriptor, info, m_CLCompileContext);
322 }
323 
325  const DetectionPostProcessQueueDescriptor& descriptor,
326  const WorkloadInfo& info) const
327 {
328  return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
329 }
330 
331 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& descriptor,
332  const WorkloadInfo& info) const
333 {
334  return std::make_unique<ClDivisionWorkload>(descriptor, info, m_CLCompileContext);
335 }
336 
338  const WorkloadInfo& info) const
339 {
340  switch(descriptor.m_Parameters.m_Operation)
341  {
342  case UnaryOperation::Abs:
343  {
344  AbsQueueDescriptor absQueueDescriptor;
345  absQueueDescriptor.m_Inputs = descriptor.m_Inputs;
346  absQueueDescriptor.m_Outputs = descriptor.m_Outputs;
347 
348  return std::make_unique<ClAbsWorkload>(absQueueDescriptor, info, m_CLCompileContext);
349  }
350  case UnaryOperation::Exp:
351  return std::make_unique<ClExpWorkload>(descriptor, info, m_CLCompileContext);
352  case UnaryOperation::Neg:
353  return std::make_unique<ClNegWorkload>(descriptor, info, m_CLCompileContext);
355  {
356  RsqrtQueueDescriptor rsqrtQueueDescriptor;
357  rsqrtQueueDescriptor.m_Inputs = descriptor.m_Inputs;
358  rsqrtQueueDescriptor.m_Outputs = descriptor.m_Outputs;
359 
360  return std::make_unique<ClRsqrtWorkload>(rsqrtQueueDescriptor, info, m_CLCompileContext);
361  }
363  return std::make_unique<ClLogicalNotWorkload>(descriptor, info, m_CLCompileContext);
364  default:
365  return nullptr;
366  }
367 }
368 
369 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateEqual(const EqualQueueDescriptor& descriptor,
370  const WorkloadInfo& info) const
371 {
372  IgnoreUnused(descriptor);
373 
374  ComparisonQueueDescriptor comparisonDescriptor;
376 
377  return CreateComparison(comparisonDescriptor, info);
378 }
379 
380 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFill(const FillQueueDescriptor& descriptor,
381  const WorkloadInfo& info) const
382 {
383  return std::make_unique<ClFillWorkload>(descriptor, info, m_CLCompileContext);
384 }
385 
386 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
387  const WorkloadInfo& info) const
388 {
389  return MakeWorkload<ClFloorFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
390 }
391 
392 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor,
393  const WorkloadInfo& info) const
394 {
395  return MakeWorkload<ClFullyConnectedWorkload>(descriptor,
396  info,
397  m_MemoryManager->GetIntraLayerManager(),
398  m_CLCompileContext);
399 }
400 
401 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateGather(const GatherQueueDescriptor& descriptor,
402  const WorkloadInfo& info) const
403 {
404  return MakeWorkload<ClGatherWorkload>(descriptor, info, m_CLCompileContext);
405 }
406 
407 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateGreater(const GreaterQueueDescriptor& descriptor,
408  const WorkloadInfo& info) const
409 {
410  IgnoreUnused(descriptor);
411 
412  ComparisonQueueDescriptor comparisonDescriptor;
414 
415  return CreateComparison(comparisonDescriptor, info);
416 }
417 
418 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
419  const WorkloadInfo& info) const
420 {
421  return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
422 }
423 
425  const InstanceNormalizationQueueDescriptor& descriptor,
426  const WorkloadInfo& info) const
427 {
428  return MakeWorkload<ClInstanceNormalizationWorkload>(descriptor, info, m_CLCompileContext);
429 }
430 
432  const WorkloadInfo& info) const
433 {
434  return MakeWorkload<ClL2NormalizationFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
435 }
436 
437 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLogicalBinary(const LogicalBinaryQueueDescriptor& descriptor,
438  const WorkloadInfo& info) const
439 {
440  switch(descriptor.m_Parameters.m_Operation)
441  {
443  return std::make_unique<ClLogicalAndWorkload>(descriptor, info, m_CLCompileContext);
445  return std::make_unique<ClLogicalOrWorkload>(descriptor, info, m_CLCompileContext);
446  default:
447  return nullptr;
448  }
449 }
450 
451 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLogSoftmax(const LogSoftmaxQueueDescriptor& descriptor,
452  const WorkloadInfo& info) const
453 {
454  return MakeWorkload<ClLogSoftmaxWorkload>(descriptor,
455  info,
456  m_MemoryManager->GetIntraLayerManager(),
457  m_CLCompileContext);
458 }
459 
460 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor,
461  const WorkloadInfo& info) const
462 {
463  return MakeWorkload<ClLstmFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
464 }
465 
466 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMaximum(const MaximumQueueDescriptor& descriptor,
467  const WorkloadInfo& info) const
468 {
469  return MakeWorkload<ClMaximumWorkload>(descriptor, info, m_CLCompileContext);
470 }
471 
472 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor,
473  const WorkloadInfo& info) const
474 {
475  return MakeWorkload<ClMeanWorkload>(descriptor, info, m_CLCompileContext);
476 }
477 
478 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
479  const WorkloadInfo& info) const
480 {
481  if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
482  {
483  throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemCopy workload");
484  }
485 
486  return MakeWorkload<CopyMemGenericWorkload>(descriptor, info);
487 }
488 
489 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMemImport(const MemImportQueueDescriptor& descriptor,
490  const WorkloadInfo& info) const
491 {
492  if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
493  {
494  throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemImport workload");
495  }
496 
497  return std::make_unique<ImportMemGenericWorkload>(descriptor, info);
498 }
499 
500 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor,
501  const WorkloadInfo& info) const
502 {
503  return CreateConcat(descriptor, info);
504 }
505 
506 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMinimum(const MinimumQueueDescriptor& descriptor,
507  const WorkloadInfo& info) const
508 {
509  return MakeWorkload<ClMinimumWorkload>(descriptor, info, m_CLCompileContext);
510 }
511 
512 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& descriptor,
513  const WorkloadInfo& info) const
514 {
515  return MakeWorkload<ClMultiplicationWorkload>(descriptor, info, m_CLCompileContext);
516 }
517 
518 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor,
519  const WorkloadInfo& info) const
520 {
521  return MakeWorkload<ClNormalizationFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
522 }
523 
524 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
525  const WorkloadInfo& info) const
526 {
527  return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
528 }
529 
530 std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
531  const WorkloadInfo& info) const
532 {
533  return MakeWorkload<ClPadWorkload>(descriptor, info, m_CLCompileContext);
534 }
535 
536 std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
537  const WorkloadInfo& info) const
538 {
539  return MakeWorkload<ClPermuteWorkload>(descriptor, info, m_CLCompileContext);
540 }
541 
542 std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
543  const WorkloadInfo& info) const
544 {
545  return MakeWorkload<ClPooling2dWorkload>(descriptor, info, m_CLCompileContext);
546 }
547 
548 std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor,
549  const WorkloadInfo& info) const
550 {
551  return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
552 }
553 
554 std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePrelu(const PreluQueueDescriptor &descriptor,
555  const WorkloadInfo &info) const
556 {
557  return MakeWorkload<ClPreluWorkload>(descriptor, info, m_CLCompileContext);
558 }
559 
560 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQLstm(const QLstmQueueDescriptor& descriptor,
561  const WorkloadInfo& info) const
562 {
563  return std::make_unique<ClQLstmWorkload>(descriptor, info, m_CLCompileContext);
564 }
565 
566 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQuantize(const QuantizeQueueDescriptor& descriptor,
567  const WorkloadInfo& info) const
568 {
569  return MakeWorkload<ClQuantizeWorkload>(descriptor, info, m_CLCompileContext);
570 }
571 
572 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQuantizedLstm(const QuantizedLstmQueueDescriptor& descriptor,
573  const WorkloadInfo& info) const
574 {
575  return MakeWorkload<ClQuantizedLstmWorkload>(descriptor, info, m_CLCompileContext);
576 }
577 
578 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateRank(const RankQueueDescriptor& descriptor,
579  const WorkloadInfo& info) const
580 {
581  return std::make_unique<ClRankWorkload>(descriptor, info);
582 }
583 
584 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateReduce(const ReduceQueueDescriptor& descriptor,
585  const WorkloadInfo& info) const
586 {
587  return std::make_unique<ClReduceWorkload>(descriptor, info);
588 }
589 
590 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
591  const WorkloadInfo& info) const
592 {
593  return MakeWorkload<ClReshapeWorkload>(descriptor, info, m_CLCompileContext);
594 }
595 
596 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor,
597  const WorkloadInfo& info) const
598 {
599  return MakeWorkload<ClResizeWorkload>(descriptor, info, m_CLCompileContext);
600 }
601 
602 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor,
603  const WorkloadInfo& info) const
604 {
605  ResizeQueueDescriptor resizeDescriptor;
606  resizeDescriptor.m_Inputs = descriptor.m_Inputs;
607  resizeDescriptor.m_Outputs = descriptor.m_Outputs;
608 
609  resizeDescriptor.m_Parameters.m_Method = ResizeMethod::Bilinear;
610  resizeDescriptor.m_Parameters.m_DataLayout = descriptor.m_Parameters.m_DataLayout;
611  resizeDescriptor.m_Parameters.m_TargetHeight = descriptor.m_Parameters.m_TargetHeight;
612  resizeDescriptor.m_Parameters.m_TargetWidth = descriptor.m_Parameters.m_TargetWidth;
613 
614  return CreateResize(resizeDescriptor, info);
615 }
616 
617 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateRsqrt(const RsqrtQueueDescriptor& descriptor,
618  const WorkloadInfo& info) const
619 {
620  IgnoreUnused(descriptor);
621 
622  ElementwiseUnaryQueueDescriptor elementwiseUnaryDescriptor;
623  elementwiseUnaryDescriptor.m_Parameters = ElementwiseUnaryDescriptor(UnaryOperation::Rsqrt);
624 
625  return CreateElementwiseUnary(elementwiseUnaryDescriptor, info);
626 }
627 
628 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSlice(const SliceQueueDescriptor& descriptor,
629  const WorkloadInfo& info) const
630 {
631  return MakeWorkload<ClSliceWorkload>(descriptor, info, m_CLCompileContext);
632 }
633 
634 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
635  const WorkloadInfo& info) const
636 {
637  return std::make_unique<ClSoftmaxWorkload>(descriptor,
638  info,
639  m_MemoryManager->GetIntraLayerManager(),
640  m_CLCompileContext);
641 }
642 
643 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
644  const WorkloadInfo& info) const
645 {
646  return MakeWorkload<ClSpaceToBatchNdWorkload>(descriptor, info, m_CLCompileContext);
647 }
648 
649 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSpaceToDepth(const SpaceToDepthQueueDescriptor& descriptor,
650  const WorkloadInfo& info) const
651 {
652  return MakeWorkload<ClSpaceToDepthWorkload>(descriptor, info, m_CLCompileContext);
653 }
654 
655 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
656  const WorkloadInfo& info) const
657 {
658  return MakeWorkload<ClSplitterWorkload>(descriptor, info, m_CLCompileContext);
659 }
660 
661 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateStack(const StackQueueDescriptor& descriptor,
662  const WorkloadInfo& info) const
663 {
664  return MakeWorkload<ClStackWorkload>(descriptor, info, m_CLCompileContext);
665 }
666 
667 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
668  const WorkloadInfo& info) const
669 {
670  return MakeWorkload<ClStridedSliceWorkload>(descriptor, info, m_CLCompileContext);
671 }
672 
673 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& descriptor,
674  const WorkloadInfo& info) const
675 {
676  return MakeWorkload<ClSubtractionWorkload>(descriptor, info, m_CLCompileContext);
677 }
678 
679 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateTranspose(const TransposeQueueDescriptor& descriptor,
680  const WorkloadInfo& info) const
681 {
682  return MakeWorkload<ClTransposeWorkload>(descriptor, info, m_CLCompileContext);
683 }
684 
686  const TransposeConvolution2dQueueDescriptor& descriptor,
687  const WorkloadInfo& info) const
688 {
689  return MakeWorkload<ClTransposeConvolution2dWorkload>(descriptor,
690  info,
691  m_MemoryManager->GetIntraLayerManager(),
692  m_CLCompileContext);
693 }
694 
695 } // namespace armnn
std::unique_ptr< IWorkload > CreateDetectionPostProcess(const DetectionPostProcessQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< ITensorHandle > CreateSubTensorHandle(ITensorHandle &parent, TensorShape const &subTensorShape, unsigned int const *subTensorOrigin) const override
std::unique_ptr< IWorkload > CreateComparison(const ComparisonQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateGather(const GatherQueueDescriptor &descriptor, const WorkloadInfo &info) const override
ClWorkloadFactory(const std::shared_ptr< ClMemoryManager > &memoryManager)
UnaryOperation m_Operation
Specifies the elementwiseUnary operation to execute.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:62
std::unique_ptr< IWorkload > CreateDebug(const DebugQueueDescriptor &descriptor, const WorkloadInfo &info) const override
DataLayout
Definition: Types.hpp:54
std::unique_ptr< IWorkload > CreateReshape(const ReshapeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
void AfterWorkloadsCreated() override
std::unique_ptr< IWorkload > CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDivision(const DivisionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
A ComparisonDescriptor for the ComparisonLayer.
Definition: Descriptors.hpp:78
uint32_t m_TargetWidth
Target width value.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
constexpr const char * ClBackendId()
Definition: ClBackendId.hpp:10
std::vector< BackendOptions > ModelOptions
std::unique_ptr< IWorkload > CreateStridedSlice(const StridedSliceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateQuantize(const QuantizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
std::unique_ptr< IWorkload > CreateInput(const InputQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateStack(const StackQueueDescriptor &descriptor, const WorkloadInfo &info) const override
ResizeMethod m_Method
The Interpolation method to use (Bilinear, NearestNeighbor).
std::unique_ptr< IWorkload > CreateFullyConnected(const FullyConnectedQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSlice(const SliceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo, const bool IsMemoryManaged=true) const override
std::unique_ptr< IWorkload > CreateQuantizedLstm(const QuantizedLstmQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateRank(const RankQueueDescriptor &descriptor, const WorkloadInfo &info) const override
const BackendId & GetBackendId() const override
bool SaveSerializedToStream(std::ostream &stream)
Serializes the ClContext to the stream.
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
TypedWorkload< QueueDescriptor, armnn::DataType::Float16, armnn::DataType::Float32 > FloatWorkload
Definition: Workload.hpp:170
std::unique_ptr< IWorkload > CreateAbs(const AbsQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateLstm(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info) const override
LogicalBinaryOperation m_Operation
Specifies the logical operation to execute.
std::unique_ptr< IWorkload > CreateLogicalBinary(const LogicalBinaryQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateLogSoftmax(const LogSoftmaxQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMean(const MeanQueueDescriptor &descriptor, const WorkloadInfo &Info) const override
std::unique_ptr< IWorkload > CreateReduce(const ReduceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMinimum(const MinimumQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateFill(const FillQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateCast(const CastQueueDescriptor &descriptor, const WorkloadInfo &info) const override
static bool IsLayerSupported(const Layer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
std::unique_ptr< IWorkload > CreateQLstm(const QLstmQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDepthToSpace(const DepthToSpaceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
std::unique_ptr< IWorkload > CreateResize(const ResizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePermute(const PermuteQueueDescriptor &descriptor, const WorkloadInfo &info) const override
void Serialize(const arm_compute::CLCompileContext &clCompileContext)
Serializes the CLCompileContext built-in programs.
std::unique_ptr< IWorkload > CreateMemCopy(const MemCopyQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor &descriptor, const WorkloadInfo &info) const override
uint32_t m_TargetWidth
Target width value.
std::unique_ptr< IWorkload > CreateSoftmax(const SoftmaxQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMaximum(const MaximumQueueDescriptor &descriptor, const WorkloadInfo &info) const override
RuntimeException WrapClError(const cl::Error &clError, const CheckLocation &location)
std::unique_ptr< IWorkload > CreateEqual(const EqualQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateFloor(const FloorQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePreCompiled(const PreCompiledQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSubtraction(const SubtractionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
std::unique_ptr< IWorkload > CreateMultiplication(const MultiplicationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateAddition(const AdditionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePooling2d(const Pooling2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
#define CHECK_LOCATION()
Definition: Exceptions.hpp:197
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
uint32_t m_TargetHeight
Target height value.
uint32_t m_TargetHeight
Target height value.
std::unique_ptr< IWorkload > CreateMerger(const MergerQueueDescriptor &descriptor, const WorkloadInfo &info) const override
void Deserialize(arm_compute::CLCompileContext &clCompileContext, cl::Context &context, cl::Device &device, const std::string &filePath)
Deserializes the CLCompileContext built-in programs from a binary file.
TypedWorkload< QueueDescriptor, armnn::DataType::QAsymmU8 > Uint8Workload
Definition: Workload.hpp:176
std::unique_ptr< IWorkload > CreateRsqrt(const RsqrtQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSplitter(const SplitterQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateGreater(const GreaterQueueDescriptor &descriptor, const WorkloadInfo &info) const override
A ElementwiseUnaryDescriptor for the ElementwiseUnaryLayer.
Definition: Descriptors.hpp:98
std::unique_ptr< IWorkload > CreateL2Normalization(const L2NormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateActivation(const ActivationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::vector< ITensorHandle * > m_Outputs
std::unique_ptr< IWorkload > CreatePad(const PadQueueDescriptor &descriptor, const WorkloadInfo &info) const override
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition: Tensor.cpp:174
std::unique_ptr< IWorkload > CreateSpaceToDepth(const SpaceToDepthQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateNormalization(const NormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateInstanceNormalization(const InstanceNormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateTranspose(const TransposeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
Contains information about inputs and outputs to a layer.
std::vector< ITensorHandle * > m_Inputs
std::unique_ptr< IWorkload > CreateBatchNormalization(const BatchNormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConcat(const ConcatQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMemImport(const MemImportQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateResizeBilinear(const ResizeBilinearQueueDescriptor &descriptor, const WorkloadInfo &info) const override
The ClBackendModelContext is used to pass in CL specific backend ModelOptions.
std::unique_ptr< IWorkload > CreatePrelu(const PreluQueueDescriptor &descriptor, const WorkloadInfo &info) const override
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
std::unique_ptr< IWorkload > CreateDequantize(const DequantizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConstant(const ConstantQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConvolution2d(const Convolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateArgMinMax(const ArgMinMaxQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::string GetCachedNetworkFilePath() const
std::unique_ptr< IWorkload > CreateOutput(const OutputQueueDescriptor &descriptor, const WorkloadInfo &info) const override