22 #include <arm_compute/core/CL/CLKernelLibrary.h> 23 #include <arm_compute/runtime/CL/CLBufferAllocator.h> 24 #include <arm_compute/runtime/CL/CLScheduler.h> 26 #include <boost/core/ignore_unused.hpp> 27 #include <boost/polymorphic_cast.hpp> 28 #include <boost/format.hpp> 40 std::string& outReasonIfUnsupported)
51 std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(
const QueueDescriptorType& descriptor,
57 return MakeWorkloadHelper<FloatWorkload, Uint8Workload>(descriptor,
info, std::forward<Args>(args)...);
65 template <
typename Workload,
typename QueueDescriptorType,
typename... Args>
66 std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(
const QueueDescriptorType& descriptor,
72 return std::make_unique<Workload>(descriptor,
info, std::forward<Args>(args)...);
81 : m_MemoryManager(memoryManager)
86 const bool IsMemoryManaged)
const 88 boost::ignore_unused(IsMemoryManaged);
89 std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo);
90 tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
97 const bool IsMemoryManaged)
const 99 boost::ignore_unused(IsMemoryManaged);
100 std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo, dataLayout);
101 tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
108 unsigned int const* subTensorOrigin)
const 111 arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape);
118 coords.set(i, boost::numeric_cast<int>(subTensorOrigin[revertedIndex]));
121 const arm_compute::TensorShape parentShape = armcomputetensorutils::BuildArmComputeTensorShape(parent.
GetShape());
122 if (!::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, parentShape, coords, shape))
127 return std::make_unique<ClSubTensorHandle>(
128 boost::polymorphic_downcast<IClTensorHandle*>(&parent), shape, coords);
134 boost::ignore_unused(descriptor);
145 return MakeWorkload<ClActivationWorkload>(descriptor,
info);
151 return MakeWorkload<ClAdditionWorkload>(descriptor,
info);
157 return std::make_unique<ClArgMinMaxWorkload>(descriptor,
info);
164 return MakeWorkload<ClBatchNormalizationFloatWorkload, NullWorkload>(descriptor,
info);
170 return MakeWorkload<ClBatchToSpaceNdWorkload>(descriptor,
info);
182 return MakeWorkload<ClGreaterFloat32Workload, ClGreaterUint8Workload>(greaterQueueDescriptor,
info);
184 return MakeWorkload<NullWorkload, NullWorkload>(descriptor,
info);
190 return MakeWorkload<ClConcatWorkload>(descriptor,
info);
196 return MakeWorkload<ClConstantWorkload>(descriptor,
info);
203 return MakeWorkload<ClConvertFp16ToFp32Workload>(descriptor,
info);
210 return MakeWorkload<ClConvertFp32ToFp16Workload>(descriptor,
info);
216 return MakeWorkload<ClConvolution2dWorkload>(descriptor,
info, m_MemoryManager->GetIntraLayerManager());
222 return MakeWorkload<NullWorkload, NullWorkload>(descriptor,
info);
228 return MakeWorkload<ClDepthToSpaceWorkload>(descriptor,
info);
235 return MakeWorkload<ClDepthwiseConvolutionWorkload>(descriptor,
info);
241 return MakeWorkload<ClDequantizeWorkload>(descriptor,
info);
248 return MakeWorkload<NullWorkload, NullWorkload>(descriptor,
info);
254 return MakeWorkload<ClDivisionFloatWorkload, NullWorkload>(descriptor,
info);
266 return MakeWorkload<ClAbsWorkload>(absQueueDescriptor,
info);
274 return MakeWorkload<ClRsqrtWorkload>(rsqrtQueueDescriptor,
info);
276 return MakeWorkload<NullWorkload, NullWorkload>(descriptor,
info);
282 boost::ignore_unused(descriptor);
293 return MakeWorkload<ClFloorFloatWorkload, NullWorkload>(descriptor,
info);
299 return MakeWorkload<ClFullyConnectedWorkload>(descriptor,
info, m_MemoryManager->GetIntraLayerManager());
305 return MakeWorkload<NullWorkload, NullWorkload>(descriptor,
info);
311 boost::ignore_unused(descriptor);
322 return std::make_unique<CopyMemGenericWorkload>(descriptor,
info);
329 return MakeWorkload<ClInstanceNormalizationWorkload>(descriptor,
info);
335 return MakeWorkload<ClL2NormalizationFloatWorkload, NullWorkload>(descriptor,
info);
341 return MakeWorkload<ClLstmFloatWorkload, NullWorkload>(descriptor,
info);
347 return MakeWorkload<ClMaximumWorkload>(descriptor,
info);
353 return MakeWorkload<ClMeanWorkload>(descriptor,
info);
364 return MakeWorkload<CopyMemGenericWorkload>(descriptor,
info);
375 return std::make_unique<ImportMemGenericWorkload>(descriptor,
info);
387 return MakeWorkload<ClMinimumWorkload>(descriptor,
info);
393 return MakeWorkload<ClMultiplicationWorkload>(descriptor,
info);
399 return MakeWorkload<ClNormalizationFloatWorkload, NullWorkload>(descriptor,
info);
405 return std::make_unique<CopyMemGenericWorkload>(descriptor,
info);
411 return MakeWorkload<ClPadWorkload>(descriptor,
info);
417 return MakeWorkload<ClPermuteWorkload>(descriptor,
info);
423 return MakeWorkload<ClPooling2dWorkload>(descriptor,
info);
429 return MakeWorkload<NullWorkload, NullWorkload>(descriptor,
info);
435 return MakeWorkload<ClPreluWorkload>(descriptor,
info);
441 return MakeWorkload<ClQuantizeWorkload>(descriptor,
info);
447 return MakeWorkload<ClQuantizedLstmWorkload>(descriptor,
info);
453 return MakeWorkload<ClReshapeWorkload>(descriptor,
info);
459 return MakeWorkload<ClResizeWorkload>(descriptor,
info);
480 boost::ignore_unused(descriptor);
491 return MakeWorkload<ClSliceWorkload>(descriptor,
info);
497 return MakeWorkload<ClSoftmaxFloatWorkload, ClSoftmaxUint8Workload>(descriptor,
info,
498 m_MemoryManager->GetIntraLayerManager());
504 return MakeWorkload<ClSpaceToBatchNdWorkload>(descriptor,
info);
510 return MakeWorkload<ClSpaceToDepthWorkload>(descriptor,
info);
516 return MakeWorkload<ClSplitterWorkload>(descriptor,
info);
522 return MakeWorkload<ClStackWorkload>(descriptor,
info);
528 return MakeWorkload<ClStridedSliceWorkload>(descriptor,
info);
534 return MakeWorkload<ClSubtractionWorkload>(descriptor,
info);
541 return MakeWorkload<ClTransposeConvolution2dWorkload>(descriptor,
info, m_MemoryManager->GetIntraLayerManager());
static bool IsLayerSupported(const Layer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
std::unique_ptr< IWorkload > CreateResize(const ResizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
TypedWorkload< QueueDescriptor, armnn::DataType::Float16, armnn::DataType::Float32 > FloatWorkload
std::unique_ptr< IWorkload > CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateInput(const InputQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateStack(const StackQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConcat(const ConcatQueueDescriptor &descriptor, const WorkloadInfo &info) const override
UnaryOperation m_Operation
Specifies the elementwiseUnary operation to execute.
unsigned int GetNumDimensions() const
std::unique_ptr< IWorkload > CreateGreater(const GreaterQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateArgMinMax(const ArgMinMaxQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePermute(const PermuteQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePad(const PadQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateFloor(const FloorQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateQuantizedLstm(const QuantizedLstmQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateInstanceNormalization(const InstanceNormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateOutput(const OutputQueueDescriptor &descriptor, const WorkloadInfo &info) const override
uint32_t m_TargetHeight
Target height value.
std::unique_ptr< IWorkload > CreateDetectionPostProcess(const DetectionPostProcessQueueDescriptor &descriptor, const WorkloadInfo &info) const override
LayerDescriptor m_Parameters
std::unique_ptr< IWorkload > CreateDepthToSpace(const DepthToSpaceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePooling2d(const Pooling2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePreCompiled(const PreCompiledQueueDescriptor &descriptor, const WorkloadInfo &info) const override
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
std::unique_ptr< IWorkload > CreateDebug(const DebugQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateAbs(const AbsQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMemCopy(const MemCopyQueueDescriptor &descriptor, const WorkloadInfo &info) const override
uint32_t m_TargetWidth
Target width value.
std::unique_ptr< IWorkload > CreateReshape(const ReshapeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateLstm(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMean(const MeanQueueDescriptor &descriptor, const WorkloadInfo &Info) const override
std::unique_ptr< IWorkload > CreateBatchNormalization(const BatchNormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSpaceToDepth(const SpaceToDepthQueueDescriptor &descriptor, const WorkloadInfo &info) const override
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
std::unique_ptr< IWorkload > CreateQuantize(const QuantizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
const BackendId & GetBackendId() const override
RuntimeException WrapClError(const cl::Error &clError, const CheckLocation &location)
virtual TensorShape GetShape() const =0
std::unique_ptr< IWorkload > CreateFullyConnected(const FullyConnectedQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateNormalization(const NormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMultiplication(const MultiplicationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
A ElementwiseUnaryDescriptor for the ElementwiseUnaryLayer.
std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo, const bool IsMemoryManaged=true) const override
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
std::unique_ptr< IWorkload > CreateDivision(const DivisionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMinimum(const MinimumQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSoftmax(const SoftmaxQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateComparison(const ComparisonQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateAddition(const AdditionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateMemImport(const MemImportQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor &descriptor, const WorkloadInfo &info) const override
TypedWorkload< QueueDescriptor, armnn::DataType::QAsymmU8 > Uint8Workload
std::unique_ptr< IWorkload > CreateSlice(const SliceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDequantize(const DequantizeQueueDescriptor &descriptor, const WorkloadInfo &info) const override
uint32_t m_TargetWidth
Target width value.
std::unique_ptr< IWorkload > CreateMaximum(const MaximumQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePrelu(const PreluQueueDescriptor &descriptor, const WorkloadInfo &info) const override
ComparisonOperation m_Operation
Specifies the comparison operation to execute.
std::unique_ptr< IWorkload > CreateConvolution2d(const Convolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateStridedSlice(const StridedSliceQueueDescriptor &descriptor, const WorkloadInfo &info) const override
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
std::unique_ptr< IWorkload > CreateConstant(const ConstantQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::vector< ITensorHandle * > m_Outputs
std::vector< ITensorHandle * > m_Inputs
std::unique_ptr< IWorkload > CreateMerger(const MergerQueueDescriptor &descriptor, const WorkloadInfo &info) const override
ClWorkloadFactory(const std::shared_ptr< ClMemoryManager > &memoryManager)
std::unique_ptr< IWorkload > CreateEqual(const EqualQueueDescriptor &descriptor, const WorkloadInfo &info) const override
uint32_t m_TargetHeight
Target height value.
std::unique_ptr< IWorkload > CreateRsqrt(const RsqrtQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSplitter(const SplitterQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateL2Normalization(const L2NormalizationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateSubtraction(const SubtractionQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< ITensorHandle > CreateSubTensorHandle(ITensorHandle &parent, TensorShape const &subTensorShape, unsigned int const *subTensorOrigin) const override
A ComparisonDescriptor for the ComparisonLayer.
std::unique_ptr< IWorkload > CreateGather(const GatherQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreateActivation(const ActivationQueueDescriptor &descriptor, const WorkloadInfo &info) const override
constexpr const char * ClBackendId()
std::unique_ptr< IWorkload > CreateResizeBilinear(const ResizeBilinearQueueDescriptor &descriptor, const WorkloadInfo &info) const override