diff options
author | Sadik Armagan <sadik.armagan@arm.com> | 2020-12-02 11:28:58 +0000 |
---|---|---|
committer | Sadik Armagan <sadik.armagan@arm.com> | 2020-12-02 11:29:35 +0000 |
commit | e9444751017fe108ce80fd5c270d04fffeb14e1e (patch) | |
tree | 29601fdf9017a2dbce71983b995709c24e6c773c /src/backends | |
parent | bbbefecd34a9420bcb003dd230402c55ee5150d5 (diff) | |
download | armnn-e9444751017fe108ce80fd5c270d04fffeb14e1e.tar.gz |
IVGCVSW-5482 'Add a ClCompileContext parameter to each ClWorkload Constructor'
* Injected CLCompileContext object to each CL workload.
Signed-off-by: Sadik Armagan <sadik.armagan@arm.com>
Change-Id: I4837dbd3d5b56cf743b3b89c944e3cdf8b11a42a
Diffstat (limited to 'src/backends')
110 files changed, 543 insertions, 230 deletions
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp index 41b779f64a..35186f286a 100644 --- a/src/backends/cl/ClWorkloadFactory.cpp +++ b/src/backends/cl/ClWorkloadFactory.cpp @@ -107,9 +107,8 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(const QueueDescriptor void ClWorkloadFactory::InitializeCLCompileContext() { // Initialize our m_CLCompileContext using default device and context - cl::Device device = cl::Device::getDefault(); - cl::Context context = cl::Context(device); - + auto context = arm_compute::CLKernelLibrary::get().context(); + auto device = arm_compute::CLKernelLibrary::get().get_device(); m_CLCompileContext = arm_compute::CLCompileContext(context, device); if (m_ModelContextPtr) @@ -200,64 +199,64 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateAbs(const AbsQueueDescriptor std::unique_ptr<IWorkload> ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClActivationWorkload>(descriptor, info); + return MakeWorkload<ClActivationWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClAdditionWorkload>(descriptor, info); + return MakeWorkload<ClAdditionWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateArgMinMax(const ArgMinMaxQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return std::make_unique<ClArgMinMaxWorkload>(descriptor, info); + return std::make_unique<ClArgMinMaxWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateBatchNormalization( const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClBatchNormalizationFloatWorkload, NullWorkload>(descriptor, info); + return MakeWorkload<ClBatchNormalizationFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClBatchToSpaceNdWorkload>(descriptor, info); + return MakeWorkload<ClBatchToSpaceNdWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateComparison(const ComparisonQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClComparisonWorkload>(descriptor, info); + return MakeWorkload<ClComparisonWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConcat(const ConcatQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClConcatWorkload>(descriptor, info); + return MakeWorkload<ClConcatWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClConstantWorkload>(descriptor, info); + return MakeWorkload<ClConstantWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp16ToFp32( const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClConvertFp16ToFp32Workload>(descriptor, info); + return MakeWorkload<ClConvertFp16ToFp32Workload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp32ToFp16( const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClConvertFp32ToFp16Workload>(descriptor, info); + return MakeWorkload<ClConvertFp32ToFp16Workload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, @@ -278,45 +277,46 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvolution2d(const Convolut return MakeWorkload<ClConvolution2dWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager(), + m_CLCompileContext, isFastMathEnabled); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info); + return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthToSpace(const DepthToSpaceQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClDepthToSpaceWorkload>(descriptor, info); + return MakeWorkload<ClDepthToSpaceWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthwiseConvolution2d( const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClDepthwiseConvolutionWorkload>(descriptor, info); + return MakeWorkload<ClDepthwiseConvolutionWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDequantize(const DequantizeQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClDequantizeWorkload>(descriptor, info); + return MakeWorkload<ClDequantizeWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDetectionPostProcess( const DetectionPostProcessQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info); + return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClDivisionFloatWorkload, NullWorkload>(descriptor, info); + return MakeWorkload<ClDivisionFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor& descriptor, @@ -330,22 +330,22 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateElementwiseUnary(const Eleme absQueueDescriptor.m_Inputs = descriptor.m_Inputs; absQueueDescriptor.m_Outputs = descriptor.m_Outputs; - return std::make_unique<ClAbsWorkload>(absQueueDescriptor, info); + return std::make_unique<ClAbsWorkload>(absQueueDescriptor, info, m_CLCompileContext); } case UnaryOperation::Exp: - return std::make_unique<ClExpWorkload>(descriptor, info); + return std::make_unique<ClExpWorkload>(descriptor, info, m_CLCompileContext); case UnaryOperation::Neg: - return std::make_unique<ClNegWorkload>(descriptor, info); + return std::make_unique<ClNegWorkload>(descriptor, info, m_CLCompileContext); case UnaryOperation::Rsqrt: { RsqrtQueueDescriptor rsqrtQueueDescriptor; rsqrtQueueDescriptor.m_Inputs = descriptor.m_Inputs; rsqrtQueueDescriptor.m_Outputs = descriptor.m_Outputs; - return std::make_unique<ClRsqrtWorkload>(rsqrtQueueDescriptor, info); + return std::make_unique<ClRsqrtWorkload>(rsqrtQueueDescriptor, info, m_CLCompileContext); } case UnaryOperation::LogicalNot: - return std::make_unique<ClLogicalNotWorkload>(descriptor, info); + return std::make_unique<ClLogicalNotWorkload>(descriptor, info, m_CLCompileContext); default: return nullptr; } @@ -365,25 +365,28 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateEqual(const EqualQueueDescri std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFill(const FillQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return std::make_unique<ClFillWorkload>(descriptor, info); + return std::make_unique<ClFillWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClFloorFloatWorkload, NullWorkload>(descriptor, info); + return MakeWorkload<ClFloorFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClFullyConnectedWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager()); + return MakeWorkload<ClFullyConnectedWorkload>(descriptor, + info, + m_MemoryManager->GetIntraLayerManager(), + m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateGather(const GatherQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClGatherWorkload>(descriptor, info); + return MakeWorkload<ClGatherWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateGreater(const GreaterQueueDescriptor& descriptor, @@ -407,13 +410,13 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateInstanceNormalization( const InstanceNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClInstanceNormalizationWorkload>(descriptor, info); + return MakeWorkload<ClInstanceNormalizationWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClL2NormalizationFloatWorkload, NullWorkload>(descriptor, info); + return MakeWorkload<ClL2NormalizationFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLogicalBinary(const LogicalBinaryQueueDescriptor& descriptor, @@ -422,9 +425,9 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLogicalBinary(const LogicalB switch(descriptor.m_Parameters.m_Operation) { case LogicalBinaryOperation::LogicalAnd: - return std::make_unique<ClLogicalAndWorkload>(descriptor, info); + return std::make_unique<ClLogicalAndWorkload>(descriptor, info, m_CLCompileContext); case LogicalBinaryOperation::LogicalOr: - return std::make_unique<ClLogicalOrWorkload>(descriptor, info); + return std::make_unique<ClLogicalOrWorkload>(descriptor, info, m_CLCompileContext); default: return nullptr; } @@ -433,25 +436,28 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLogicalBinary(const LogicalB std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLogSoftmax(const LogSoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClLogSoftmaxWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager()); + return MakeWorkload<ClLogSoftmaxWorkload>(descriptor, + info, + m_MemoryManager->GetIntraLayerManager(), + m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClLstmFloatWorkload, NullWorkload>(descriptor, info); + return MakeWorkload<ClLstmFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMaximum(const MaximumQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClMaximumWorkload>(descriptor, info); + return MakeWorkload<ClMaximumWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClMeanWorkload>(descriptor, info); + return MakeWorkload<ClMeanWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, @@ -485,19 +491,19 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMerger(const MergerQueueDesc std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMinimum(const MinimumQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClMinimumWorkload>(descriptor, info); + return MakeWorkload<ClMinimumWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClMultiplicationWorkload>(descriptor, info); + return MakeWorkload<ClMultiplicationWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClNormalizationFloatWorkload, NullWorkload>(descriptor, info); + return MakeWorkload<ClNormalizationFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, @@ -509,61 +515,61 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateOutput(const OutputQueueDesc std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClPadWorkload>(descriptor, info); + return MakeWorkload<ClPadWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClPermuteWorkload>(descriptor, info); + return MakeWorkload<ClPermuteWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClPooling2dWorkload>(descriptor, info); + return MakeWorkload<ClPooling2dWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info); + return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePrelu(const PreluQueueDescriptor &descriptor, const WorkloadInfo &info) const { - return MakeWorkload<ClPreluWorkload>(descriptor, info); + return MakeWorkload<ClPreluWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQLstm(const QLstmQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return std::make_unique<ClQLstmWorkload>(descriptor, info); + return std::make_unique<ClQLstmWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQuantize(const QuantizeQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClQuantizeWorkload>(descriptor, info); + return MakeWorkload<ClQuantizeWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQuantizedLstm(const QuantizedLstmQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClQuantizedLstmWorkload>(descriptor, info); + return MakeWorkload<ClQuantizedLstmWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClReshapeWorkload>(descriptor, info); + return MakeWorkload<ClReshapeWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClResizeWorkload>(descriptor, info); + return MakeWorkload<ClResizeWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, @@ -595,62 +601,68 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateRsqrt(const RsqrtQueueDescri std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSlice(const SliceQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClSliceWorkload>(descriptor, info); + return MakeWorkload<ClSliceWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return std::make_unique<ClSoftmaxWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager()); + return std::make_unique<ClSoftmaxWorkload>(descriptor, + info, + m_MemoryManager->GetIntraLayerManager(), + m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClSpaceToBatchNdWorkload>(descriptor, info); + return MakeWorkload<ClSpaceToBatchNdWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSpaceToDepth(const SpaceToDepthQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClSpaceToDepthWorkload>(descriptor, info); + return MakeWorkload<ClSpaceToDepthWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClSplitterWorkload>(descriptor, info); + return MakeWorkload<ClSplitterWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateStack(const StackQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClStackWorkload>(descriptor, info); + return MakeWorkload<ClStackWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClStridedSliceWorkload>(descriptor, info); + return MakeWorkload<ClStridedSliceWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClSubtractionWorkload>(descriptor, info); + return MakeWorkload<ClSubtractionWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateTranspose(const TransposeQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClTransposeWorkload>(descriptor, info); + return MakeWorkload<ClTransposeWorkload>(descriptor, info, m_CLCompileContext); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateTransposeConvolution2d( const TransposeConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClTransposeConvolution2dWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager()); + return MakeWorkload<ClTransposeConvolution2dWorkload>(descriptor, + info, + m_MemoryManager->GetIntraLayerManager(), + m_CLCompileContext); } } // namespace armnn diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp index 4bd3d3a33d..765409a426 100644 --- a/src/backends/cl/test/ClCreateWorkloadTests.cpp +++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp @@ -10,6 +10,8 @@ #include <armnn/utility/IgnoreUnused.hpp> #include <armnn/utility/PolymorphicDowncast.hpp> #include <backendsCommon/MemCopyWorkload.hpp> +#include <backendsCommon/test/TensorCopyUtils.hpp> +#include <backendsCommon/test/WorkloadTestUtils.hpp> #include <aclCommon/test/CreateWorkloadClNeon.hpp> #include <aclCommon/ArmComputeTensorUtils.hpp> @@ -334,6 +336,98 @@ BOOST_AUTO_TEST_CASE(CreateConvolution2dFastMathEnabledWorkload) ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::WINOGRAD); } +BOOST_AUTO_TEST_CASE(CreateConvolution2dClCompiledContextWorkload) +{ + using namespace armnn; + + const DataType inputType = DataType::QAsymmU8; + const DataType kernelType = DataType::QSymmS8; + const DataType biasType = DataType::Signed32; + + TensorInfo inputInfo ({ 1, 3, 1, 2 }, inputType, 0.5f, 128); + TensorInfo outputInfo({ 1, 3, 1, 3 }, inputType, 1.0f, 128); + + const std::vector<float> quantScales{ 0.5f, 0.75f, 1.0f }; + constexpr unsigned int quantDimension = 0; + + TensorInfo kernelInfo({ 3, 1, 1, 2 }, kernelType, quantScales, quantDimension); + + const std::vector<float> biasQuantScales{ 0.25f, 0.375f, 0.5f }; + TensorInfo biasInfo({ 3 }, biasType, biasQuantScales, quantDimension); + + std::vector<uint8_t> inputData = + { + 138, 108, 138, 108, 138, 108 + }; + + std::vector<int8_t> kernelData = + { + 1, 2, 1, 2, 1, 2 + }; + + std::vector<int32_t> biasData = + { + 4, 4, 4 + }; + + std::vector<uint8_t> expectedOutputData = + { + 121, 118, 115, 121, 118, 115, 121, 118, 115 + }; + + + Convolution2dDescriptor descriptor; + descriptor.m_StrideX = 1; + descriptor.m_StrideY = 1; + descriptor.m_PadLeft = 0; + descriptor.m_PadRight = 0; + descriptor.m_PadTop = 0; + descriptor.m_PadBottom = 0; + descriptor.m_BiasEnabled = true; + descriptor.m_DataLayout = DataLayout::NHWC; + + auto memoryManager = ClWorkloadFactoryHelper::GetMemoryManager(); + auto clMemoryManager = armnn::PolymorphicPointerDowncast<armnn::ClMemoryManager>(memoryManager); + auto tensorHandleFactory = ClWorkloadFactoryHelper::GetTensorHandleFactory(memoryManager); + + std::unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo); + std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo); + + + WorkloadInfo workloadInfo; + ScopedCpuTensorHandle weightTensor(kernelInfo); + ScopedCpuTensorHandle biasTensor(biasInfo); + + AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data()); + AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data()); + + Convolution2dQueueDescriptor queueDescriptor; + queueDescriptor.m_Parameters = descriptor; + queueDescriptor.m_Weight = &weightTensor; + queueDescriptor.m_Bias = &biasTensor; + + AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get()); + AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get()); + + // Initialize our m_CLCompileContext using default device and context + auto context = arm_compute::CLKernelLibrary::get().context(); + auto device = arm_compute::CLKernelLibrary::get().get_device(); + auto clCompileContext = arm_compute::CLCompileContext(context, device); + + + + // Check built programs are empty in context + BOOST_TEST(clCompileContext.get_built_programs().empty()); + + auto workload = std::make_unique<ClConvolution2dWorkload>(queueDescriptor, + workloadInfo, + clMemoryManager->GetIntraLayerManager(), + clCompileContext); + ARMNN_ASSERT(workload != nullptr); + // Check built programs are not empty in context + BOOST_TEST(!clCompileContext.get_built_programs().empty()); +} + template <typename DepthwiseConvolutionWorkloadType, typename armnn::DataType DataType> static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout) { diff --git a/src/backends/cl/workloads/ClAbsWorkload.cpp b/src/backends/cl/workloads/ClAbsWorkload.cpp index 858ef5b46d..4682c646d1 100644 --- a/src/backends/cl/workloads/ClAbsWorkload.cpp +++ b/src/backends/cl/workloads/ClAbsWorkload.cpp @@ -24,7 +24,9 @@ arm_compute::Status ClAbsWorkloadValidate(const TensorInfo& input, const TensorI return arm_compute::CLAbsLayer::validate(&aclInput, &aclOutput); } -ClAbsWorkload::ClAbsWorkload(const AbsQueueDescriptor& descriptor, const WorkloadInfo& info) +ClAbsWorkload::ClAbsWorkload(const AbsQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<AbsQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClAbsWorkload", 1, 1); @@ -32,7 +34,7 @@ ClAbsWorkload::ClAbsWorkload(const AbsQueueDescriptor& descriptor, const Workloa arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_AbsLayer.configure(&input, &output); + m_AbsLayer.configure(clCompileContext, &input, &output); } void ClAbsWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClAbsWorkload.hpp b/src/backends/cl/workloads/ClAbsWorkload.hpp index 763cafcfbd..d0f7d16f41 100644 --- a/src/backends/cl/workloads/ClAbsWorkload.hpp +++ b/src/backends/cl/workloads/ClAbsWorkload.hpp @@ -18,7 +18,9 @@ arm_compute::Status ClAbsWorkloadValidate(const TensorInfo& input, const TensorI class ClAbsWorkload : public BaseWorkload<AbsQueueDescriptor> { public: - ClAbsWorkload(const AbsQueueDescriptor& descriptor, const WorkloadInfo& info); + ClAbsWorkload(const AbsQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); virtual void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClActivationWorkload.cpp b/src/backends/cl/workloads/ClActivationWorkload.cpp index 685652036b..8997a9720d 100644 --- a/src/backends/cl/workloads/ClActivationWorkload.cpp +++ b/src/backends/cl/workloads/ClActivationWorkload.cpp @@ -30,7 +30,8 @@ arm_compute::Status ClActivationWorkloadValidate(const TensorInfo& input, } ClActivationWorkload::ClActivationWorkload(const ActivationQueueDescriptor& descriptor, - const WorkloadInfo& info) + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<ActivationQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClActivationWorkload", 1, 1); @@ -40,7 +41,7 @@ ClActivationWorkload::ClActivationWorkload(const ActivationQueueDescriptor& desc arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_ActivationLayer.configure(&input, &output, activationLayerInfo); + m_ActivationLayer.configure(clCompileContext, &input, &output, activationLayerInfo); } void ClActivationWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClActivationWorkload.hpp b/src/backends/cl/workloads/ClActivationWorkload.hpp index 35166332e6..6b71e8653a 100644 --- a/src/backends/cl/workloads/ClActivationWorkload.hpp +++ b/src/backends/cl/workloads/ClActivationWorkload.hpp @@ -18,7 +18,9 @@ arm_compute::Status ClActivationWorkloadValidate(const TensorInfo& input, class ClActivationWorkload : public BaseWorkload<ActivationQueueDescriptor> { public: - ClActivationWorkload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); + ClActivationWorkload(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClAdditionWorkload.cpp b/src/backends/cl/workloads/ClAdditionWorkload.cpp index 7e75a04110..0ab7446026 100644 --- a/src/backends/cl/workloads/ClAdditionWorkload.cpp +++ b/src/backends/cl/workloads/ClAdditionWorkload.cpp @@ -19,7 +19,8 @@ using namespace armcomputetensorutils; static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; ClAdditionWorkload::ClAdditionWorkload(const AdditionQueueDescriptor& descriptor, - const WorkloadInfo& info) + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<AdditionQueueDescriptor>(descriptor, info) { this->m_Data.ValidateInputsOutputs("ClAdditionWorkload", 2, 1); @@ -30,7 +31,7 @@ ClAdditionWorkload::ClAdditionWorkload(const AdditionQueueDescriptor& descriptor const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); - m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy, activationInfo); + m_Layer.configure(clCompileContext, &input0, &input1, &output, g_AclConvertPolicy, activationInfo); } void ClAdditionWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClAdditionWorkload.hpp b/src/backends/cl/workloads/ClAdditionWorkload.hpp index 372c4bc6f7..cd25be12e7 100644 --- a/src/backends/cl/workloads/ClAdditionWorkload.hpp +++ b/src/backends/cl/workloads/ClAdditionWorkload.hpp @@ -15,7 +15,9 @@ namespace armnn class ClAdditionWorkload : public BaseWorkload<AdditionQueueDescriptor> { public: - ClAdditionWorkload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info); + ClAdditionWorkload(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; diff --git a/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp b/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp index 5910080859..8974930afa 100644 --- a/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp +++ b/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp @@ -53,7 +53,8 @@ arm_compute::Status ClArgMinMaxWorkloadValidate(const TensorInfo& input, ClArgMinMaxWorkload::ClArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descriptor, - const WorkloadInfo& info) + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<ArgMinMaxQueueDescriptor>(descriptor, info) { arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor(); @@ -69,7 +70,11 @@ ClArgMinMaxWorkload::ClArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descrip } else { - m_ArgMinMaxLayer.configure(&input, aclAxis, &output, arm_compute::ReductionOperation::ARG_IDX_MIN); + m_ArgMinMaxLayer.configure(clCompileContext, + &input, + aclAxis, + &output, + arm_compute::ReductionOperation::ARG_IDX_MIN); } } diff --git a/src/backends/cl/workloads/ClArgMinMaxWorkload.hpp b/src/backends/cl/workloads/ClArgMinMaxWorkload.hpp index 54f28e6175..3ec137d49e 100644 --- a/src/backends/cl/workloads/ClArgMinMaxWorkload.hpp +++ b/src/backends/cl/workloads/ClArgMinMaxWorkload.hpp @@ -20,7 +20,9 @@ arm_compute::Status ClArgMinMaxWorkloadValidate(const TensorInfo& input, class ClArgMinMaxWorkload : public BaseWorkload<ArgMinMaxQueueDescriptor> { public: - ClArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descriptor, const WorkloadInfo& info); + ClArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); virtual void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp index c595e20a1f..daaed17a90 100644 --- a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp @@ -52,7 +52,9 @@ arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input, } ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload( - const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) + const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : FloatWorkload<BatchNormalizationQueueDescriptor>(descriptor, info) { m_Mean = std::make_unique<arm_compute::CLTensor>(); @@ -78,7 +80,8 @@ ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload( const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); - m_Layer.configure(&input, + m_Layer.configure(clCompileContext, + &input, &output, m_Mean.get(), m_Variance.get(), diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp index ef5778309e..c9f1f7f295 100644 --- a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp +++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp @@ -25,7 +25,9 @@ arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input, class ClBatchNormalizationFloatWorkload : public FloatWorkload<BatchNormalizationQueueDescriptor> { public: - ClBatchNormalizationFloatWorkload(const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); + ClBatchNormalizationFloatWorkload(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); using FloatWorkload<BatchNormalizationQueueDescriptor>::FloatWorkload; void Execute() const override; diff --git a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp index 1a7a8dca81..8978c5a66e 100644 --- a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp +++ b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp @@ -18,8 +18,9 @@ namespace armnn using namespace armcomputetensorutils; ClBatchToSpaceNdWorkload::ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& desc, - const WorkloadInfo& info) - : BaseWorkload<BatchToSpaceNdQueueDescriptor>(desc, info) + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) + : BaseWorkload<BatchToSpaceNdQueueDescriptor>(desc, info) { m_Data.ValidateInputsOutputs("ClBatchToSpaceNdWorkload", 1, 1); @@ -35,7 +36,7 @@ ClBatchToSpaceNdWorkload::ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDesc arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); output.info()->set_data_layout(aclDataLayout); - m_Layer.configure(&input, blockWidth, blockHeight, &output); + m_Layer.configure(clCompileContext, &input, blockWidth, blockHeight, &output); } void ClBatchToSpaceNdWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp index 881b294097..2262f33c73 100644 --- a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp +++ b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp @@ -18,7 +18,9 @@ arm_compute::Status ClBatchToSpaceNdWorkloadValidate(const TensorInfo& input, class ClBatchToSpaceNdWorkload : public BaseWorkload<BatchToSpaceNdQueueDescriptor> { public: - ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& descriptor, const WorkloadInfo& info); + ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; diff --git a/src/backends/cl/workloads/ClComparisonWorkload.cpp b/src/backends/cl/workloads/ClComparisonWorkload.cpp index 30b336dd94..20e5669807 100644 --- a/src/backends/cl/workloads/ClComparisonWorkload.cpp +++ b/src/backends/cl/workloads/ClComparisonWorkload.cpp @@ -39,7 +39,9 @@ arm_compute::Status ClComparisonWorkloadValidate(const TensorInfo& input0, return aclStatus; } -ClComparisonWorkload::ClComparisonWorkload(const ComparisonQueueDescriptor& descriptor, const WorkloadInfo& info) +ClComparisonWorkload::ClComparisonWorkload(const ComparisonQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<ComparisonQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClComparisonWorkload", 2, 1); @@ -50,7 +52,7 @@ ClComparisonWorkload::ClComparisonWorkload(const ComparisonQueueDescriptor& desc const arm_compute::ComparisonOperation comparisonOperation = ConvertComparisonOperationToAcl(m_Data.m_Parameters); - m_ComparisonLayer.configure(&input0, &input1, &output, comparisonOperation); + m_ComparisonLayer.configure(clCompileContext, &input0, &input1, &output, comparisonOperation); } void ClComparisonWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClComparisonWorkload.hpp b/src/backends/cl/workloads/ClComparisonWorkload.hpp index e842152fed..4a92e6b6dc 100644 --- a/src/backends/cl/workloads/ClComparisonWorkload.hpp +++ b/src/backends/cl/workloads/ClComparisonWorkload.hpp @@ -20,7 +20,9 @@ arm_compute::Status ClComparisonWorkloadValidate(const TensorInfo& input0, class ClComparisonWorkload : public BaseWorkload<ComparisonQueueDescriptor> { public: - ClComparisonWorkload(const ComparisonQueueDescriptor& descriptor, const WorkloadInfo& info); + ClComparisonWorkload(const ComparisonQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClConcatWorkload.cpp b/src/backends/cl/workloads/ClConcatWorkload.cpp index da0d675232..3c5f23742a 100644 --- a/src/backends/cl/workloads/ClConcatWorkload.cpp +++ b/src/backends/cl/workloads/ClConcatWorkload.cpp @@ -11,7 +11,6 @@ #include <cl/ClLayerSupport.hpp> #include <arm_compute/core/Types.h> -#include <arm_compute/runtime/CL/functions/CLConcatenateLayer.h> namespace armnn { @@ -46,7 +45,9 @@ arm_compute::Status ClConcatWorkloadValidate(const std::vector<const TensorInfo* return arm_compute::CLConcatenateLayer::validate(aclInputPtrs, &aclOutputInfo, aclAxis); } -ClConcatWorkload::ClConcatWorkload(const ConcatQueueDescriptor& descriptor, const WorkloadInfo& info) +ClConcatWorkload::ClConcatWorkload(const ConcatQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<ConcatQueueDescriptor>(descriptor, info) { bool allInputsAreSubtensors = true; @@ -83,7 +84,7 @@ ClConcatWorkload::ClConcatWorkload(const ConcatQueueDescriptor& descriptor, cons // Configure input and output tensors size_t aclAxis = CalcAxis(descriptor.m_Parameters); - layer->configure(aclInputs, &output, aclAxis); + layer->configure(clCompileContext, aclInputs, &output, aclAxis); // Prepare layer->prepare(); diff --git a/src/backends/cl/workloads/ClConcatWorkload.hpp b/src/backends/cl/workloads/ClConcatWorkload.hpp index 772bc094ea..3120b423b6 100644 --- a/src/backends/cl/workloads/ClConcatWorkload.hpp +++ b/src/backends/cl/workloads/ClConcatWorkload.hpp @@ -9,6 +9,7 @@ #include <arm_compute/core/Error.h> #include <arm_compute/runtime/IFunction.h> +#include <arm_compute/runtime/CL/functions/CLConcatenateLayer.h> namespace armnn { @@ -20,7 +21,9 @@ arm_compute::Status ClConcatWorkloadValidate(const std::vector<const TensorInfo* class ClConcatWorkload : public BaseWorkload<ConcatQueueDescriptor> { public: - ClConcatWorkload(const ConcatQueueDescriptor& descriptor, const WorkloadInfo& info); + ClConcatWorkload(const ConcatQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; diff --git a/src/backends/cl/workloads/ClConstantWorkload.cpp b/src/backends/cl/workloads/ClConstantWorkload.cpp index d6b5c57a7e..40acb8ebd0 100644 --- a/src/backends/cl/workloads/ClConstantWorkload.cpp +++ b/src/backends/cl/workloads/ClConstantWorkload.cpp @@ -41,7 +41,9 @@ arm_compute::Status ClConstantWorkloadValidate(const TensorInfo& output) } } -ClConstantWorkload::ClConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) +ClConstantWorkload::ClConstantWorkload(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext&) : BaseWorkload<ConstantQueueDescriptor>(descriptor, info) , m_RanOnce(false) { diff --git a/src/backends/cl/workloads/ClConstantWorkload.hpp b/src/backends/cl/workloads/ClConstantWorkload.hpp index e5a1d4410d..8fa5d632c2 100644 --- a/src/backends/cl/workloads/ClConstantWorkload.hpp +++ b/src/backends/cl/workloads/ClConstantWorkload.hpp @@ -8,6 +8,8 @@ #include <arm_compute/core/Error.h> #include <backendsCommon/Workload.hpp> +#include <arm_compute/core/CL/CLCompileContext.h> + namespace armnn { arm_compute::Status ClConstantWorkloadValidate(const TensorInfo& output); @@ -15,7 +17,9 @@ arm_compute::Status ClConstantWorkloadValidate(const TensorInfo& output); class ClConstantWorkload : public BaseWorkload<ConstantQueueDescriptor> { public: - ClConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info); + ClConstantWorkload(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; diff --git a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp index d2e86f8c94..aaffd83741 100644 --- a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp +++ b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp @@ -15,7 +15,9 @@ using namespace armcomputetensorutils; static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; ClConvertFp16ToFp32Workload::ClConvertFp16ToFp32Workload( - const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info) : + const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>(descriptor, info) { this->m_Data.ValidateInputsOutputs("ClConvertFp16ToFp32Workload", 1, 1); @@ -23,7 +25,7 @@ ClConvertFp16ToFp32Workload::ClConvertFp16ToFp32Workload( arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(&input, &output, g_AclConvertPolicy, 0); + m_Layer.configure(clCompileContext, &input, &output, g_AclConvertPolicy, 0); } void ClConvertFp16ToFp32Workload::Execute() const diff --git a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp index ef5c9b6497..e8e6c98014 100644 --- a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp +++ b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp @@ -16,7 +16,9 @@ class ClConvertFp16ToFp32Workload : public Float16ToFloat32Workload<ConvertFp16T { public: - ClConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info); + ClConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); virtual void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp index 3f528a1532..a9f1d91bcf 100644 --- a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp +++ b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp @@ -15,7 +15,9 @@ using namespace armcomputetensorutils; static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; ClConvertFp32ToFp16Workload::ClConvertFp32ToFp16Workload( - const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info) : + const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>(descriptor, info) { this->m_Data.ValidateInputsOutputs("ClConvertFp32ToFp16Workload", 1, 1); @@ -23,7 +25,7 @@ ClConvertFp32ToFp16Workload::ClConvertFp32ToFp16Workload( arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(&input, &output, g_AclConvertPolicy, 0); + m_Layer.configure(clCompileContext, &input, &output, g_AclConvertPolicy, 0); } void ClConvertFp32ToFp16Workload::Execute() const diff --git a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp index 6e04e39425..17eac7d23b 100644 --- a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp +++ b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp @@ -16,7 +16,9 @@ class ClConvertFp32ToFp16Workload : public Float32ToFloat16Workload<ConvertFp32T { public: - ClConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info); + ClConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); virtual void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp index 50cb9ded37..99a981bd0c 100644 --- a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp +++ b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp @@ -65,6 +65,7 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager, + const arm_compute::CLCompileContext& clCompileContext, const bool isFastMathEnabled) : BaseWorkload<Convolution2dQueueDescriptor>(descriptor, info) , m_ConvolutionLayer(memoryManager) @@ -97,7 +98,8 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); - m_ConvolutionLayer.configure(&input, + m_ConvolutionLayer.configure(clCompileContext, + &input, m_KernelTensor.get(), m_BiasTensor.get(), &output, diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp index 70170b569d..d0f7a5b251 100644 --- a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp +++ b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp @@ -32,6 +32,7 @@ public: ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager, + const arm_compute::CLCompileContext& clCompileContext, const bool isFastMathEnabled = false); void Execute() const override; diff --git a/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp b/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp index 43c81dc209..d42b261a10 100644 --- a/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp +++ b/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp @@ -37,7 +37,8 @@ arm_compute::Status ClDepthToSpaceWorkloadValidate(const TensorInfo& input, } ClDepthToSpaceWorkload::ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor& desc, - const WorkloadInfo& info) + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<DepthToSpaceQueueDescriptor>(desc, info) { m_Data.ValidateInputsOutputs("ClDepthToSpaceWorkload", 1, 1); @@ -54,7 +55,7 @@ ClDepthToSpaceWorkload::ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor PolymorphicPointerDowncast<IClTensorHandle>(m_Data.m_Outputs[0])->GetTensor(); output.info()->set_data_layout(aclDataLayout); - m_Layer.configure(&input, &output, blockSize); + m_Layer.configure(clCompileContext, &input, &output, blockSize); } void ClDepthToSpaceWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp b/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp index de8b496669..6cb8bb5e9e 100644 --- a/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp +++ b/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp @@ -21,7 +21,9 @@ arm_compute::Status ClDepthToSpaceWorkloadValidate(const TensorInfo& input, class ClDepthToSpaceWorkload : public BaseWorkload<DepthToSpaceQueueDescriptor> { public: - ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor& descriptor, const WorkloadInfo& info); + ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp index 53f16848eb..655f0c9c35 100644 --- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp +++ b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp @@ -75,7 +75,8 @@ arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& inp ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload( const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info) { // Allocate a buffer for the swizzling of the weight tensor @@ -124,6 +125,7 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload( m_DepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer>(); static_cast<arm_compute::CLDepthwiseConvolutionLayer*>(m_DepthwiseConvolutionLayer.get())->configure( + clCompileContext, &input, m_KernelTensor.get(), m_BiasTensor.get(), diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp index c75913737d..d490012cd9 100644 --- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp +++ b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp @@ -27,7 +27,8 @@ public: using BaseWorkload<DepthwiseConvolution2dQueueDescriptor>::m_Data; ClDepthwiseConvolutionWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info); + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; diff --git a/src/backends/cl/workloads/ClDequantizeWorkload.cpp b/src/backends/cl/workloads/ClDequantizeWorkload.cpp index eb63900380..52d8fab93c 100644 --- a/src/backends/cl/workloads/ClDequantizeWorkload.cpp +++ b/src/backends/cl/workloads/ClDequantizeWorkload.cpp @@ -28,7 +28,8 @@ arm_compute::Status ClDequantizeWorkloadValidate(const TensorInfo& input, const } ClDequantizeWorkload::ClDequantizeWorkload(const DequantizeQueueDescriptor& descriptor, - const WorkloadInfo& workloadInfo) + const WorkloadInfo& workloadInfo, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<DequantizeQueueDescriptor>(descriptor, workloadInfo) { m_Data.ValidateInputsOutputs("ClDequantizeWorkload", 1, 1); @@ -40,7 +41,7 @@ ClDequantizeWorkload::ClDequantizeWorkload(const DequantizeQueueDescriptor& desc m_Data.m_Outputs[0])->GetTensor(); m_Layer.reset(new arm_compute::CLDequantizationLayer()); - m_Layer->configure(&input, &output); + m_Layer->configure(clCompileContext, &input, &output); m_Layer->prepare(); } diff --git a/src/backends/cl/workloads/ClDequantizeWorkload.hpp b/src/backends/cl/workloads/ClDequantizeWorkload.hpp index 6e61da2ebf..628ea20f1b 100644 --- a/src/backends/cl/workloads/ClDequantizeWorkload.hpp +++ b/src/backends/cl/workloads/ClDequantizeWorkload.hpp @@ -17,7 +17,9 @@ arm_compute::Status ClDequantizeWorkloadValidate(const TensorInfo& input, const class ClDequantizeWorkload : public BaseWorkload<DequantizeQueueDescriptor> { public: - ClDequantizeWorkload(const DequantizeQueueDescriptor& descriptor, const WorkloadInfo& workloadInfo); + ClDequantizeWorkload(const DequantizeQueueDescriptor& descriptor, + const WorkloadInfo& workloadInfo, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; diff --git a/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp b/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp index c79e55ebdd..3df8d52f6d 100644 --- a/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp @@ -32,7 +32,8 @@ arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0, ClDivisionFloatWorkload::ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor, - const WorkloadInfo& info) + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : FloatWorkload<DivisionQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClDivisionFloatWorkload", 2, 1); @@ -43,7 +44,7 @@ ClDivisionFloatWorkload::ClDivisionFloatWorkload(const DivisionQueueDescriptor& const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); - m_ArithmeticDivision.configure(&input0, &input1, &output, activationInfo); + m_ArithmeticDivision.configure(clCompileContext, &input0, &input1, &output, activationInfo); } void ClDivisionFloatWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp b/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp index 71d27ed5b5..481b8b0a89 100644 --- a/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp +++ b/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp @@ -20,8 +20,9 @@ arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0, class ClDivisionFloatWorkload : public FloatWorkload<DivisionQueueDescriptor> { public: - ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor, const - WorkloadInfo& info); + ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); using FloatWorkload<DivisionQueueDescriptor>::FloatWorkload; void Execute() const override; diff --git a/src/backends/cl/workloads/ClExpWorkload.cpp b/src/backends/cl/workloads/ClExpWorkload.cpp index abf4181286..60c383f8bf 100644 --- a/src/backends/cl/workloads/ClExpWorkload.cpp +++ b/src/backends/cl/workloads/ClExpWorkload.cpp @@ -23,7 +23,9 @@ arm_compute::Status ClExpWorkloadValidate(const TensorInfo& input, const TensorI return arm_compute::CLExpLayer::validate(&aclInput, &aclOutput); } -ClExpWorkload::ClExpWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info) +ClExpWorkload::ClExpWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<ElementwiseUnaryQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClExpWorkload", 1, 1); @@ -31,7 +33,7 @@ ClExpWorkload::ClExpWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_ExpLayer.configure(&input, &output); + m_ExpLayer.configure(clCompileContext, &input, &output); } void ClExpWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClExpWorkload.hpp b/src/backends/cl/workloads/ClExpWorkload.hpp index c35aebbeb9..407fb5ed1d 100644 --- a/src/backends/cl/workloads/ClExpWorkload.hpp +++ b/src/backends/cl/workloads/ClExpWorkload.hpp @@ -18,7 +18,9 @@ arm_compute::Status ClExpWorkloadValidate(const TensorInfo& input, const TensorI class ClExpWorkload : public BaseWorkload<ElementwiseUnaryQueueDescriptor> { public: - ClExpWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info); + ClExpWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); virtual void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClFillWorkload.cpp b/src/backends/cl/workloads/ClFillWorkload.cpp index 47a70bc677..a2204fa42d 100644 --- a/src/backends/cl/workloads/ClFillWorkload.cpp +++ b/src/backends/cl/workloads/ClFillWorkload.cpp @@ -15,7 +15,9 @@ namespace armnn { using namespace armcomputetensorutils; -ClFillWorkload::ClFillWorkload(const FillQueueDescriptor& descriptor, const WorkloadInfo& info) +ClFillWorkload::ClFillWorkload(const FillQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<FillQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClFillWorkload", 1, 1); @@ -23,7 +25,7 @@ ClFillWorkload::ClFillWorkload(const FillQueueDescriptor& descriptor, const Work arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor(); arm_compute::PixelValue pixelValue = GetPixelValue(output, descriptor.m_Parameters.m_Value); - m_Layer.configure(&output, pixelValue); + m_Layer.configure(clCompileContext, &output, pixelValue); } void ClFillWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClFillWorkload.hpp b/src/backends/cl/workloads/ClFillWorkload.hpp index 8919445d0c..8539501f17 100644 --- a/src/backends/cl/workloads/ClFillWorkload.hpp +++ b/src/backends/cl/workloads/ClFillWorkload.hpp @@ -14,7 +14,9 @@ namespace armnn { class ClFillWorkload : public BaseWorkload<FillQueueDescriptor> { public: - ClFillWorkload(const FillQueueDescriptor& descriptor, const WorkloadInfo& info); + ClFillWorkload(const FillQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClFloorFloatWorkload.cpp b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp index f38342ed39..3915270c24 100644 --- a/src/backends/cl/workloads/ClFloorFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp @@ -20,7 +20,9 @@ arm_compute::Status ClFloorWorkloadValidate(const TensorInfo& input, return arm_compute::CLFloor::validate(&aclInput, &aclOutput); } -ClFloorFloatWorkload::ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info) +ClFloorFloatWorkload::ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : FloatWorkload<FloorQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClFloorFloatWorkload", 1, 1); @@ -28,7 +30,7 @@ ClFloorFloatWorkload::ClFloorFloatWorkload(const FloorQueueDescriptor& descripto arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(&input, &output); + m_Layer.configure(clCompileContext, &input, &output); } void ClFloorFloatWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClFloorFloatWorkload.hpp b/src/backends/cl/workloads/ClFloorFloatWorkload.hpp index 1ddaddff0b..1c755c5b57 100644 --- a/src/backends/cl/workloads/ClFloorFloatWorkload.hpp +++ b/src/backends/cl/workloads/ClFloorFloatWorkload.hpp @@ -18,7 +18,9 @@ arm_compute::Status ClFloorWorkloadValidate(const TensorInfo& input, class ClFloorFloatWorkload : public FloatWorkload<FloorQueueDescriptor> { public: - ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info); + ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp index eaec639f28..9135d27376 100644 --- a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp +++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp @@ -45,8 +45,11 @@ arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input, fullyConnectedLayerInfo); } -ClFullyConnectedWorkload::ClFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor, - const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) +ClFullyConnectedWorkload::ClFullyConnectedWorkload( + const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info) , m_FullyConnectedLayer(memoryManager) { @@ -69,7 +72,12 @@ ClFullyConnectedWorkload::ClFullyConnectedWorkload(const FullyConnectedQueueDesc arm_compute::FullyConnectedLayerInfo fc_info = ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo); - m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info); + m_FullyConnectedLayer.configure(clCompileContext, + &input, + m_WeightsTensor.get(), + m_BiasesTensor.get(), + &output, + fc_info); InitializeArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight); diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp index 311b59498b..45394da97f 100644 --- a/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp +++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp @@ -27,7 +27,8 @@ class ClFullyConnectedWorkload : public armnn::BaseWorkload<armnn::FullyConnecte public: ClFullyConnectedWorkload(const armnn::FullyConnectedQueueDescriptor& descriptor, const armnn::WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager, + const arm_compute::CLCompileContext& clCompileContext); using armnn::BaseWorkload<armnn::FullyConnectedQueueDescriptor>::m_Data; void Execute() const override; diff --git a/src/backends/cl/workloads/ClGatherWorkload.cpp b/src/backends/cl/workloads/ClGatherWorkload.cpp index c76b9c7a17..98dfe7bc81 100644 --- a/src/backends/cl/workloads/ClGatherWorkload.cpp +++ b/src/backends/cl/workloads/ClGatherWorkload.cpp @@ -27,7 +27,8 @@ arm_compute::Status ClGatherWorkloadValidate(const TensorInfo& input, } ClGatherWorkload::ClGatherWorkload(const GatherQueueDescriptor& descriptor, - const WorkloadInfo& info) + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<GatherQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClGatherWorkload", 1, 1); @@ -38,7 +39,7 @@ ClGatherWorkload::ClGatherWorkload(const GatherQueueDescriptor& descriptor, int aclAxis = ComputeAclAxis(descriptor.m_Parameters.m_Axis, info.m_InputTensorInfos[0]); - m_Layer.configure(&input, &indices, &output, aclAxis); + m_Layer.configure(clCompileContext, &input, &indices, &output, aclAxis); }; void ClGatherWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClGatherWorkload.hpp b/src/backends/cl/workloads/ClGatherWorkload.hpp index df71a99fa0..8199aaf338 100644 --- a/src/backends/cl/workloads/ClGatherWorkload.hpp +++ b/src/backends/cl/workloads/ClGatherWorkload.hpp @@ -19,7 +19,9 @@ arm_compute::Status ClGatherWorkloadValidate(const TensorInfo& input, class ClGatherWorkload : public BaseWorkload<GatherQueueDescriptor> { public: - ClGatherWorkload(const GatherQueueDescriptor& descriptor, const WorkloadInfo& info); + ClGatherWorkload(const GatherQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp index 50cf345a7f..a0e8e7b87d 100644 --- a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp +++ b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp @@ -31,7 +31,8 @@ arm_compute::Status ClInstanceNormalizationWorkloadValidate(const TensorInfo& in ClInstanceNormalizationWorkload::ClInstanceNormalizationWorkload( const InstanceNormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<InstanceNormalizationQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClInstanceNormalizationWorkload", 1, 1); @@ -43,7 +44,8 @@ ClInstanceNormalizationWorkload::ClInstanceNormalizationWorkload( input.info()->set_data_layout(aclDataLayout); output.info()->set_data_layout(aclDataLayout); - m_Layer.configure(&input, + m_Layer.configure(clCompileContext, + &input, &output, descriptor.m_Parameters.m_Gamma, descriptor.m_Parameters.m_Beta, diff --git a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.hpp b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.hpp index 0e37bdcc9b..957ba34685 100644 --- a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.hpp +++ b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.hpp @@ -19,7 +19,9 @@ arm_compute::Status ClInstanceNormalizationWorkloadValidate(const TensorInfo& in class ClInstanceNormalizationWorkload : public BaseWorkload<InstanceNormalizationQueueDescriptor> { public: - ClInstanceNormalizationWorkload(const InstanceNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); + ClInstanceNormalizationWorkload(const InstanceNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp index e1a61277d5..bd38219a3e 100644 --- a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp @@ -27,7 +27,8 @@ arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input, } ClL2NormalizationFloatWorkload::ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : FloatWorkload<L2NormalizationQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClL2NormalizationFloatWorkload", 1, 1); @@ -41,7 +42,7 @@ ClL2NormalizationFloatWorkload::ClL2NormalizationFloatWorkload(const L2Normaliza int axis = (m_Data.m_Parameters.m_DataLayout == DataLayout::NCHW) ? 2 : 0; - m_Layer.configure(&input, &output, axis, m_Data.m_Parameters.m_Eps); + m_Layer.configure(clCompileContext, &input, &output, axis, m_Data.m_Parameters.m_Eps); } void ClL2NormalizationFloatWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp index 26aea9fd88..8648da4492 100644 --- a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp +++ b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp @@ -19,7 +19,9 @@ arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input, class ClL2NormalizationFloatWorkload : public FloatWorkload<L2NormalizationQueueDescriptor> { public: - ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); + ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; diff --git a/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp b/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp index 6d53523291..1a255f13f6 100644 --- a/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp +++ b/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp @@ -25,8 +25,10 @@ arm_compute::Status ClLogSoftmaxWorkloadValidate(const TensorInfo& input, return arm_compute::CLLogSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta, aclAxis); } -ClLogSoftmaxWorkload::ClLogSoftmaxWorkload(const LogSoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) +ClLogSoftmaxWorkload::ClLogSoftmaxWorkload(const LogSoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<LogSoftmaxQueueDescriptor>(descriptor, info) , m_LogSoftmaxLayer(memoryManager) { @@ -36,7 +38,7 @@ ClLogSoftmaxWorkload::ClLogSoftmaxWorkload(const LogSoftmaxQueueDescriptor& desc arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); int aclAxis = ComputeAclAxis(m_Data.m_Parameters.m_Axis, info.m_InputTensorInfos[0]); - m_LogSoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta, aclAxis); + m_LogSoftmaxLayer.configure(clCompileContext, &input, &output, m_Data.m_Parameters.m_Beta, aclAxis); } void ClLogSoftmaxWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClLogSoftmaxWorkload.hpp b/src/backends/cl/workloads/ClLogSoftmaxWorkload.hpp index 9b531add19..a2835887e0 100644 --- a/src/backends/cl/workloads/ClLogSoftmaxWorkload.hpp +++ b/src/backends/cl/workloads/ClLogSoftmaxWorkload.hpp @@ -25,7 +25,8 @@ class ClLogSoftmaxWorkload : public BaseWorkload<LogSoftmaxQueueDescriptor> { public: ClLogSoftmaxWorkload(const LogSoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClLogicalAndWorkload.cpp b/src/backends/cl/workloads/ClLogicalAndWorkload.cpp index 9418d73c23..f04cede2f8 100644 --- a/src/backends/cl/workloads/ClLogicalAndWorkload.cpp +++ b/src/backends/cl/workloads/ClLogicalAndWorkload.cpp @@ -32,7 +32,8 @@ arm_compute::Status ClLogicalAndWorkloadValidate(const TensorInfo& input0, } ClLogicalAndWorkload::ClLogicalAndWorkload(const LogicalBinaryQueueDescriptor& descriptor, - const WorkloadInfo& info) + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<LogicalBinaryQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClLogicalAndWorkload", 2, 1); @@ -41,7 +42,7 @@ ClLogicalAndWorkload::ClLogicalAndWorkload(const LogicalBinaryQueueDescriptor& d arm_compute::ICLTensor& input1 = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_LogicalAndLayer.configure(&input0, &input1, &output); + m_LogicalAndLayer.configure(clCompileContext, &input0, &input1, &output); } void ClLogicalAndWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClLogicalAndWorkload.hpp b/src/backends/cl/workloads/ClLogicalAndWorkload.hpp index 3bf6afe9d4..c7cbf5a7cc 100644 --- a/src/backends/cl/workloads/ClLogicalAndWorkload.hpp +++ b/src/backends/cl/workloads/ClLogicalAndWorkload.hpp @@ -20,7 +20,9 @@ arm_compute::Status ClLogicalAndWorkloadValidate(const TensorInfo& input0, class ClLogicalAndWorkload : public BaseWorkload<LogicalBinaryQueueDescriptor> { public: - ClLogicalAndWorkload(const LogicalBinaryQueueDescriptor& descriptor, const WorkloadInfo& info); + ClLogicalAndWorkload(const LogicalBinaryQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); virtual void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClLogicalNotWorkload.cpp b/src/backends/cl/workloads/ClLogicalNotWorkload.cpp index eb90cafd1c..475e57f8dc 100644 --- a/src/backends/cl/workloads/ClLogicalNotWorkload.cpp +++ b/src/backends/cl/workloads/ClLogicalNotWorkload.cpp @@ -29,7 +29,8 @@ arm_compute::Status ClLogicalNotWorkloadValidate(const TensorInfo& input, } ClLogicalNotWorkload::ClLogicalNotWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, - const WorkloadInfo& info) + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<ElementwiseUnaryQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClLogicalNotWorkload", 1, 1); @@ -37,7 +38,7 @@ ClLogicalNotWorkload::ClLogicalNotWorkload(const ElementwiseUnaryQueueDescriptor arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_LogicalNotLayer.configure(&input, &output); + m_LogicalNotLayer.configure(clCompileContext, &input, &output); } void ClLogicalNotWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClLogicalNotWorkload.hpp b/src/backends/cl/workloads/ClLogicalNotWorkload.hpp index f1225c7ba7..9c6c3d15a6 100644 --- a/src/backends/cl/workloads/ClLogicalNotWorkload.hpp +++ b/src/backends/cl/workloads/ClLogicalNotWorkload.hpp @@ -18,7 +18,9 @@ arm_compute::Status ClLogicalNotWorkloadValidate(const TensorInfo& input, const class ClLogicalNotWorkload : public BaseWorkload<ElementwiseUnaryQueueDescriptor> { public: - ClLogicalNotWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info); + ClLogicalNotWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); virtual void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClLogicalOrWorkload.cpp b/src/backends/cl/workloads/ClLogicalOrWorkload.cpp index e9895bfc39..355310ef5a 100644 --- a/src/backends/cl/workloads/ClLogicalOrWorkload.cpp +++ b/src/backends/cl/workloads/ClLogicalOrWorkload.cpp @@ -32,7 +32,8 @@ arm_compute::Status ClLogicalOrWorkloadValidate(const TensorInfo& input0, } ClLogicalOrWorkload::ClLogicalOrWorkload(const LogicalBinaryQueueDescriptor& descriptor, - const WorkloadInfo& info) + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<LogicalBinaryQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClLogicalOrWorkload", 2, 1); @@ -41,7 +42,7 @@ ClLogicalOrWorkload::ClLogicalOrWorkload(const LogicalBinaryQueueDescriptor& des arm_compute::ICLTensor& input1 = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_LogicalOrLayer.configure(&input0, &input1, &output); + m_LogicalOrLayer.configure(clCompileContext, &input0, &input1, &output); } void ClLogicalOrWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClLogicalOrWorkload.hpp b/src/backends/cl/workloads/ClLogicalOrWorkload.hpp index 8faabde90a..a6db9908bb 100644 --- a/src/backends/cl/workloads/ClLogicalOrWorkload.hpp +++ b/src/backends/cl/workloads/ClLogicalOrWorkload.hpp @@ -20,7 +20,9 @@ arm_compute::Status ClLogicalOrWorkloadValidate(const TensorInfo& input0, class ClLogicalOrWorkload : public BaseWorkload<LogicalBinaryQueueDescriptor> { public: - ClLogicalOrWorkload(const LogicalBinaryQueueDescriptor& descriptor, const WorkloadInfo& info); + ClLogicalOrWorkload(const LogicalBinaryQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); virtual void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp index fe9b45e054..58cc735704 100644 --- a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp @@ -19,7 +19,9 @@ namespace armnn { using namespace armcomputetensorutils; -ClLstmFloatWorkload::ClLstmFloatWorkload(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info) +ClLstmFloatWorkload::ClLstmFloatWorkload(const LstmQueueDescriptor &descriptor, + const WorkloadInfo &info, + const arm_compute::CLCompileContext& clCompileContext) : FloatWorkload<LstmQueueDescriptor>(descriptor, info) { arm_compute::LSTMParams<arm_compute::ICLTensor> lstm_param; @@ -185,11 +187,12 @@ ClLstmFloatWorkload::ClLstmFloatWorkload(const LstmQueueDescriptor &descriptor, throw armnn::Exception("Wrong Type of Activation Function!"); } - m_LstmLayer.configure(&input, m_InputToForgetWeightsTensor.get(), m_InputToCellWeightsTensor.get(), - m_InputToOutputWeightsTensor.get(), m_RecurrentToForgetWeightsTensor.get(), - m_RecurrentToCellWeightsTensor.get(), m_RecurrentToOutputWeightsTensor.get(), - m_ForgetGateBiasTensor.get(), m_CellBiasTensor.get(), m_OutputGateBiasTensor.get(), - &output_state_in, &cell_state_in, m_ScratchBuffer.get(), &output_state_out, + m_LstmLayer.configure(clCompileContext, &input, m_InputToForgetWeightsTensor.get(), + m_InputToCellWeightsTensor.get(), m_InputToOutputWeightsTensor.get(), + m_RecurrentToForgetWeightsTensor.get(), m_RecurrentToCellWeightsTensor.get(), + m_RecurrentToOutputWeightsTensor.get(), m_ForgetGateBiasTensor.get(), + m_CellBiasTensor.get(), m_OutputGateBiasTensor.get(), &output_state_in, + &cell_state_in, m_ScratchBuffer.get(), &output_state_out, &cell_state_out, &output, lstm_param, activationLayerInfo, cell_threshold, projection_threshold); diff --git a/src/backends/cl/workloads/ClLstmFloatWorkload.hpp b/src/backends/cl/workloads/ClLstmFloatWorkload.hpp index b7cb408b40..b0847503d9 100644 --- a/src/backends/cl/workloads/ClLstmFloatWorkload.hpp +++ b/src/backends/cl/workloads/ClLstmFloatWorkload.hpp @@ -18,7 +18,9 @@ namespace armnn class ClLstmFloatWorkload : public FloatWorkload<LstmQueueDescriptor> { public: - ClLstmFloatWorkload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info); + ClLstmFloatWorkload(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClMaximumWorkload.cpp b/src/backends/cl/workloads/ClMaximumWorkload.cpp index cd3192d186..85bea47f21 100644 --- a/src/backends/cl/workloads/ClMaximumWorkload.cpp +++ b/src/backends/cl/workloads/ClMaximumWorkload.cpp @@ -37,7 +37,8 @@ arm_compute::Status ClMaximumWorkloadValidate(const TensorInfo& input0, } ClMaximumWorkload::ClMaximumWorkload(const MaximumQueueDescriptor& descriptor, - const WorkloadInfo& info) + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<MaximumQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClMaximumWorkload", 2, 1); @@ -46,7 +47,7 @@ ClMaximumWorkload::ClMaximumWorkload(const MaximumQueueDescriptor& descriptor, arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_MaximumLayer.configure(&input0, &input1, &output); + m_MaximumLayer.configure(clCompileContext, &input0, &input1, &output); } void ClMaximumWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClMaximumWorkload.hpp b/src/backends/cl/workloads/ClMaximumWorkload.hpp index 18f67cddf6..f6681c79a3 100644 --- a/src/backends/cl/workloads/ClMaximumWorkload.hpp +++ b/src/backends/cl/workloads/ClMaximumWorkload.hpp @@ -19,7 +19,9 @@ arm_compute::Status ClMaximumWorkloadValidate(const TensorInfo& input0, class ClMaximumWorkload : public BaseWorkload<MaximumQueueDescriptor> { public: - ClMaximumWorkload(const MaximumQueueDescriptor& descriptor, const WorkloadInfo& info); + ClMaximumWorkload(const MaximumQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClMeanWorkload.cpp b/src/backends/cl/workloads/ClMeanWorkload.cpp index 470b6a883d..4cc0f7c1c2 100644 --- a/src/backends/cl/workloads/ClMeanWorkload.cpp +++ b/src/backends/cl/workloads/ClMeanWorkload.cpp @@ -28,7 +28,9 @@ arm_compute::Status ClMeanValidate(const TensorInfo& input, return arm_compute::CLReduceMean::validate(&aclInputInfo, coords, desc.m_KeepDims, &aclOutputInfo); } -ClMeanWorkload::ClMeanWorkload(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info) +ClMeanWorkload::ClMeanWorkload(const MeanQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<MeanQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClMeanWorkload", 1, 1); @@ -40,7 +42,7 @@ ClMeanWorkload::ClMeanWorkload(const MeanQueueDescriptor& descriptor, const Work info.m_InputTensorInfos[0].GetNumDimensions(), m_Data.m_Parameters.m_Axis); - m_Layer.configure(&input, coords, m_Data.m_Parameters.m_KeepDims, &output); + m_Layer.configure(clCompileContext, &input, coords, m_Data.m_Parameters.m_KeepDims, &output); } void ClMeanWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClMeanWorkload.hpp b/src/backends/cl/workloads/ClMeanWorkload.hpp index 127c0548b1..04e9fe23f2 100644 --- a/src/backends/cl/workloads/ClMeanWorkload.hpp +++ b/src/backends/cl/workloads/ClMeanWorkload.hpp @@ -19,7 +19,9 @@ arm_compute::Status ClMeanValidate(const TensorInfo& input, class ClMeanWorkload : public BaseWorkload<MeanQueueDescriptor> { public: - ClMeanWorkload(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info); + ClMeanWorkload(const MeanQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; diff --git a/src/backends/cl/workloads/ClMinimumWorkload.cpp b/src/backends/cl/workloads/ClMinimumWorkload.cpp index 5f8dfdb8eb..07a78b5008 100644 --- a/src/backends/cl/workloads/ClMinimumWorkload.cpp +++ b/src/backends/cl/workloads/ClMinimumWorkload.cpp @@ -37,7 +37,8 @@ arm_compute::Status ClMinimumWorkloadValidate(const TensorInfo& input0, } ClMinimumWorkload::ClMinimumWorkload(const MinimumQueueDescriptor& descriptor, - const WorkloadInfo& info) + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<MinimumQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClMinimumWorkload", 2, 1); @@ -46,7 +47,7 @@ ClMinimumWorkload::ClMinimumWorkload(const MinimumQueueDescriptor& descriptor, arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_MinimumLayer.configure(&input0, &input1, &output); + m_MinimumLayer.configure(clCompileContext, &input0, &input1, &output); } void ClMinimumWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClMinimumWorkload.hpp b/src/backends/cl/workloads/ClMinimumWorkload.hpp index 55d7eea9dd..34e7bb8d14 100644 --- a/src/backends/cl/workloads/ClMinimumWorkload.hpp +++ b/src/backends/cl/workloads/ClMinimumWorkload.hpp @@ -19,7 +19,9 @@ arm_compute::Status ClMinimumWorkloadValidate(const TensorInfo& input0, class ClMinimumWorkload : public BaseWorkload<MinimumQueueDescriptor> { public: - ClMinimumWorkload(const MinimumQueueDescriptor& descriptor, const WorkloadInfo& info); + ClMinimumWorkload(const MinimumQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp b/src/backends/cl/workloads/ClMultiplicationWorkload.cpp index 46a1c4bc59..31e9d022cc 100644 --- a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp +++ b/src/backends/cl/workloads/ClMultiplicationWorkload.cpp @@ -45,7 +45,8 @@ arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0, ClMultiplicationWorkload::ClMultiplicationWorkload(const MultiplicationQueueDescriptor& descriptor, - const WorkloadInfo& info) + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<MultiplicationQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClMultiplicationWorkload", 2, 1); @@ -62,7 +63,8 @@ ClMultiplicationWorkload::ClMultiplicationWorkload(const MultiplicationQueueDesc const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); // Construct - m_PixelWiseMultiplication.configure(&input0, + m_PixelWiseMultiplication.configure(clCompileContext, + &input0, &input1, &output, 1.0f, diff --git a/src/backends/cl/workloads/ClMultiplicationWorkload.hpp b/src/backends/cl/workloads/ClMultiplicationWorkload.hpp index 461449cc35..424f3d7045 100644 --- a/src/backends/cl/workloads/ClMultiplicationWorkload.hpp +++ b/src/backends/cl/workloads/ClMultiplicationWorkload.hpp @@ -20,7 +20,9 @@ arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0, class ClMultiplicationWorkload : public BaseWorkload<MultiplicationQueueDescriptor> { public: - ClMultiplicationWorkload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info); + ClMultiplicationWorkload(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); using BaseWorkload<MultiplicationQueueDescriptor>::BaseWorkload; void Execute() const override; diff --git a/src/backends/cl/workloads/ClNegWorkload.cpp b/src/backends/cl/workloads/ClNegWorkload.cpp index 27886624b0..7505ab608a 100644 --- a/src/backends/cl/workloads/ClNegWorkload.cpp +++ b/src/backends/cl/workloads/ClNegWorkload.cpp @@ -23,7 +23,9 @@ arm_compute::Status ClNegWorkloadValidate(const TensorInfo& input, const TensorI return arm_compute::CLNegLayer::validate(&aclInput, &aclOutput); } -ClNegWorkload::ClNegWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info) +ClNegWorkload::ClNegWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<ElementwiseUnaryQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClNegWorkload", 1, 1); @@ -31,7 +33,7 @@ ClNegWorkload::ClNegWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_NegLayer.configure(&input, &output); + m_NegLayer.configure(clCompileContext, &input, &output); } void ClNegWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClNegWorkload.hpp b/src/backends/cl/workloads/ClNegWorkload.hpp index 9dbfa07665..7ee35cb87a 100644 --- a/src/backends/cl/workloads/ClNegWorkload.hpp +++ b/src/backends/cl/workloads/ClNegWorkload.hpp @@ -18,7 +18,9 @@ arm_compute::Status ClNegWorkloadValidate(const TensorInfo& input, const TensorI class ClNegWorkload : public BaseWorkload<ElementwiseUnaryQueueDescriptor> { public: - ClNegWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info); + ClNegWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); virtual void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp index 5f2fd7ab83..290d29ae52 100644 --- a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp @@ -29,7 +29,8 @@ arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input, } ClNormalizationFloatWorkload::ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : FloatWorkload<NormalizationQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClNormalizationFloatWorkload", 1, 1); @@ -43,7 +44,7 @@ ClNormalizationFloatWorkload::ClNormalizationFloatWorkload(const NormalizationQu arm_compute::NormalizationLayerInfo normalizationInfo = BuildArmComputeNormalizationLayerInfo(m_Data.m_Parameters); - m_NormalizationLayer.configure(&input, &output, normalizationInfo); + m_NormalizationLayer.configure(clCompileContext, &input, &output, normalizationInfo); }; void ClNormalizationFloatWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp index a6d4f25e5e..dd309b44c2 100644 --- a/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp +++ b/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp @@ -19,7 +19,9 @@ arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input, class ClNormalizationFloatWorkload : public FloatWorkload<NormalizationQueueDescriptor> { public: - ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); + ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClPadWorkload.cpp b/src/backends/cl/workloads/ClPadWorkload.cpp index 8a8c34a212..533855c295 100644 --- a/src/backends/cl/workloads/ClPadWorkload.cpp +++ b/src/backends/cl/workloads/ClPadWorkload.cpp @@ -15,7 +15,9 @@ namespace armnn { using namespace armcomputetensorutils; -ClPadWorkload::ClPadWorkload(const PadQueueDescriptor& descriptor, const WorkloadInfo& info) +ClPadWorkload::ClPadWorkload(const PadQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<PadQueueDescriptor>(descriptor, info) { this->m_Data.ValidateInputsOutputs("ClPadWorkload", 1, 1); @@ -33,7 +35,7 @@ ClPadWorkload::ClPadWorkload(const PadQueueDescriptor& descriptor, const Workloa arm_compute::PixelValue pixelValue = GetPixelValue(input, descriptor.m_Parameters.m_PadValue); - m_Layer.configure(&input, &output, padList, pixelValue); + m_Layer.configure(clCompileContext, &input, &output, padList, pixelValue); } void ClPadWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClPadWorkload.hpp b/src/backends/cl/workloads/ClPadWorkload.hpp index e87cbb71c2..ac9b4b7c65 100644 --- a/src/backends/cl/workloads/ClPadWorkload.hpp +++ b/src/backends/cl/workloads/ClPadWorkload.hpp @@ -14,7 +14,9 @@ namespace armnn { class ClPadWorkload : public BaseWorkload<PadQueueDescriptor> { public: - ClPadWorkload(const PadQueueDescriptor& descriptor, const WorkloadInfo& info); + ClPadWorkload(const PadQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; diff --git a/src/backends/cl/workloads/ClPermuteWorkload.cpp b/src/backends/cl/workloads/ClPermuteWorkload.cpp index 41bce1d4fa..5aadc7629e 100644 --- a/src/backends/cl/workloads/ClPermuteWorkload.cpp +++ b/src/backends/cl/workloads/ClPermuteWorkload.cpp @@ -27,7 +27,8 @@ arm_compute::Status ClPermuteWorkloadValidate(const TensorInfo& input, } ClPermuteWorkload::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, - const WorkloadInfo& info) + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<PermuteQueueDescriptor>(descriptor, info) { using armcomputetensorutils::BuildArmComputePermutationVector; @@ -39,7 +40,7 @@ ClPermuteWorkload::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; // Run the layer. - m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings)); + m_PermuteFunction.configure(clCompileContext, &input, &output, BuildArmComputePermutationVector(mappings)); } void ClPermuteWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClPermuteWorkload.hpp b/src/backends/cl/workloads/ClPermuteWorkload.hpp index 8b5f4c6147..2df2b26a11 100644 --- a/src/backends/cl/workloads/ClPermuteWorkload.hpp +++ b/src/backends/cl/workloads/ClPermuteWorkload.hpp @@ -29,7 +29,9 @@ public: return name; } - ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info); + ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClPooling2dWorkload.cpp b/src/backends/cl/workloads/ClPooling2dWorkload.cpp index 683880439f..c7cc10218a 100644 --- a/src/backends/cl/workloads/ClPooling2dWorkload.cpp +++ b/src/backends/cl/workloads/ClPooling2dWorkload.cpp @@ -28,7 +28,9 @@ arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input, } ClPooling2dWorkload::ClPooling2dWorkload( - const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info) + const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<Pooling2dQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClPooling2dWorkload", 1, 1); @@ -48,7 +50,7 @@ ClPooling2dWorkload::ClPooling2dWorkload( arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters, fpMixedPrecision); // Run the layer. - m_PoolingLayer.configure(&input, &output, layerInfo); + m_PoolingLayer.configure(clCompileContext, &input, &output, layerInfo); } void ClPooling2dWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClPooling2dWorkload.hpp b/src/backends/cl/workloads/ClPooling2dWorkload.hpp index ce67db2a13..f4117aca2e 100644 --- a/src/backends/cl/workloads/ClPooling2dWorkload.hpp +++ b/src/backends/cl/workloads/ClPooling2dWorkload.hpp @@ -22,7 +22,8 @@ public: using BaseWorkload<Pooling2dQueueDescriptor>::m_Data; ClPooling2dWorkload(const Pooling2dQueueDescriptor& descriptor, - const WorkloadInfo& info); + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; diff --git a/src/backends/cl/workloads/ClPreluWorkload.cpp b/src/backends/cl/workloads/ClPreluWorkload.cpp index 1813105c21..73fa887532 100644 --- a/src/backends/cl/workloads/ClPreluWorkload.cpp +++ b/src/backends/cl/workloads/ClPreluWorkload.cpp @@ -27,7 +27,8 @@ arm_compute::Status ClPreluWorkloadValidate(const TensorInfo& input, } ClPreluWorkload::ClPreluWorkload(const PreluQueueDescriptor& descriptor, - const WorkloadInfo& info) + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<PreluQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClPreluWorkload", 1, 1); @@ -36,7 +37,7 @@ ClPreluWorkload::ClPreluWorkload(const PreluQueueDescriptor& descriptor, arm_compute::ICLTensor& alpha = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_PreluLayer.configure(&input, &alpha, &output); + m_PreluLayer.configure(clCompileContext, &input, &alpha, &output); } void ClPreluWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClPreluWorkload.hpp b/src/backends/cl/workloads/ClPreluWorkload.hpp index 9061416431..ac8932c9d1 100644 --- a/src/backends/cl/workloads/ClPreluWorkload.hpp +++ b/src/backends/cl/workloads/ClPreluWorkload.hpp @@ -18,7 +18,9 @@ arm_compute::Status ClPreluWorkloadValidate(const TensorInfo& input, class ClPreluWorkload : public BaseWorkload<PreluQueueDescriptor> { public: - ClPreluWorkload(const PreluQueueDescriptor& descriptor, const WorkloadInfo& info); + ClPreluWorkload(const PreluQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClQLstmWorkload.cpp b/src/backends/cl/workloads/ClQLstmWorkload.cpp index 7ece05f5ff..0ae371575b 100644 --- a/src/backends/cl/workloads/ClQLstmWorkload.cpp +++ b/src/backends/cl/workloads/ClQLstmWorkload.cpp @@ -14,7 +14,9 @@ namespace armnn { using namespace armcomputetensorutils; -ClQLstmWorkload::ClQLstmWorkload(const QLstmQueueDescriptor& descriptor, const WorkloadInfo& info) +ClQLstmWorkload::ClQLstmWorkload(const QLstmQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<QLstmQueueDescriptor>(descriptor, info) { arm_compute::LSTMParams<arm_compute::ICLTensor> qLstmParams; @@ -150,8 +152,9 @@ ClQLstmWorkload::ClQLstmWorkload(const QLstmQueueDescriptor& descriptor, const W m_Data.m_Parameters.m_CellIntermediateScale, m_Data.m_Parameters.m_OutputIntermediateScale); - // QLSTM NEON configure - m_QLstmLayer.configure(&input, + // QLSTM CL configure + m_QLstmLayer.configure(clCompileContext, + &input, m_InputToForgetWeightsTensor.get(), m_InputToCellWeightsTensor.get(), m_InputToOutputWeightsTensor.get(), diff --git a/src/backends/cl/workloads/ClQLstmWorkload.hpp b/src/backends/cl/workloads/ClQLstmWorkload.hpp index f98c9b3f9a..6758abcde9 100644 --- a/src/backends/cl/workloads/ClQLstmWorkload.hpp +++ b/src/backends/cl/workloads/ClQLstmWorkload.hpp @@ -19,7 +19,9 @@ namespace armnn class ClQLstmWorkload : public BaseWorkload<QLstmQueueDescriptor> { public: - ClQLstmWorkload(const QLstmQueueDescriptor& descriptor, const WorkloadInfo& info); + ClQLstmWorkload(const QLstmQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); virtual void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClQuantizeWorkload.cpp b/src/backends/cl/workloads/ClQuantizeWorkload.cpp index 263065a5a4..5c945e0921 100644 --- a/src/backends/cl/workloads/ClQuantizeWorkload.cpp +++ b/src/backends/cl/workloads/ClQuantizeWorkload.cpp @@ -29,7 +29,9 @@ arm_compute::Status ClQuantizeWorkloadValidate(const TensorInfo& input, &aclOutputInfo); } -ClQuantizeWorkload::ClQuantizeWorkload(const QuantizeQueueDescriptor& descriptor, const WorkloadInfo& info) +ClQuantizeWorkload::ClQuantizeWorkload(const QuantizeQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<QuantizeQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClQuantizeWorkload", 1, 1); @@ -37,7 +39,7 @@ ClQuantizeWorkload::ClQuantizeWorkload(const QuantizeQueueDescriptor& descriptor arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(&input, &output); + m_Layer.configure(clCompileContext, &input, &output); } void ClQuantizeWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClQuantizeWorkload.hpp b/src/backends/cl/workloads/ClQuantizeWorkload.hpp index f4a7ec64dd..9bb1572c5c 100644 --- a/src/backends/cl/workloads/ClQuantizeWorkload.hpp +++ b/src/backends/cl/workloads/ClQuantizeWorkload.hpp @@ -18,7 +18,9 @@ arm_compute::Status ClQuantizeWorkloadValidate(const TensorInfo& input, class ClQuantizeWorkload : public BaseWorkload<QuantizeQueueDescriptor> { public: - ClQuantizeWorkload(const QuantizeQueueDescriptor& descriptor, const WorkloadInfo& info); + ClQuantizeWorkload(const QuantizeQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp b/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp index 688ebf9184..636bdecbeb 100644 --- a/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp +++ b/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp @@ -62,7 +62,8 @@ arm_compute::Status ClQuantizedLstmWorkloadValidate(const TensorInfo& input, con } ClQuantizedLstmWorkload::ClQuantizedLstmWorkload(const QuantizedLstmQueueDescriptor &descriptor, - const WorkloadInfo &info): + const WorkloadInfo &info, + const arm_compute::CLCompileContext& clCompileContext): BaseWorkload<QuantizedLstmQueueDescriptor>(descriptor, info) { m_InputToInputWeightsTensor = std::make_unique<arm_compute::CLTensor>(); @@ -108,7 +109,8 @@ ClQuantizedLstmWorkload::ClQuantizedLstmWorkload(const QuantizedLstmQueueDescrip arm_compute::ICLTensor& cellStateOutTensor = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); arm_compute::ICLTensor& outputStateOutTensor = static_cast<IClTensorHandle*>(m_Data.m_Outputs[1])->GetTensor(); - m_QuantizedLstmLayer.configure(&inputTensor, m_InputToInputWeightsTensor.get(), m_InputToForgetWeightsTensor.get(), + m_QuantizedLstmLayer.configure(clCompileContext, &inputTensor, m_InputToInputWeightsTensor.get(), + m_InputToForgetWeightsTensor.get(), m_InputToCellWeightsTensor.get(), m_InputToOutputWeightsTensor.get(), m_RecurrentToInputWeightsTensor.get(), m_RecurrentToForgetWeightsTensor.get(), m_RecurrentToCellWeightsTensor.get(), m_RecurrentToOutputWeightsTensor.get(), diff --git a/src/backends/cl/workloads/ClQuantizedLstmWorkload.hpp b/src/backends/cl/workloads/ClQuantizedLstmWorkload.hpp index 580db490d6..6561850d79 100644 --- a/src/backends/cl/workloads/ClQuantizedLstmWorkload.hpp +++ b/src/backends/cl/workloads/ClQuantizedLstmWorkload.hpp @@ -22,7 +22,9 @@ arm_compute::Status ClQuantizedLstmWorkloadValidate(const TensorInfo& input, con class ClQuantizedLstmWorkload : public BaseWorkload<QuantizedLstmQueueDescriptor> { public: - ClQuantizedLstmWorkload(const QuantizedLstmQueueDescriptor& descriptor, const WorkloadInfo& info); + ClQuantizedLstmWorkload(const QuantizedLstmQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClReshapeWorkload.cpp b/src/backends/cl/workloads/ClReshapeWorkload.cpp index d752290444..0988babf23 100644 --- a/src/backends/cl/workloads/ClReshapeWorkload.cpp +++ b/src/backends/cl/workloads/ClReshapeWorkload.cpp @@ -21,7 +21,9 @@ arm_compute::Status ClReshapeWorkloadValidate(const TensorInfo& input, return arm_compute::CLReshapeLayer::validate(&aclInputInfo, &aclOutputInfo); } -ClReshapeWorkload::ClReshapeWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) +ClReshapeWorkload::ClReshapeWorkload(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<ReshapeQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClReshapeWorkload", 1, 1); @@ -29,7 +31,7 @@ ClReshapeWorkload::ClReshapeWorkload(const ReshapeQueueDescriptor& descriptor, c arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(&input, &output); + m_Layer.configure(clCompileContext, &input, &output); } void ClReshapeWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClReshapeWorkload.hpp b/src/backends/cl/workloads/ClReshapeWorkload.hpp index d836f1e43a..70d72879f5 100644 --- a/src/backends/cl/workloads/ClReshapeWorkload.hpp +++ b/src/backends/cl/workloads/ClReshapeWorkload.hpp @@ -18,7 +18,9 @@ arm_compute::Status ClReshapeWorkloadValidate(const TensorInfo& input, class ClReshapeWorkload : public BaseWorkload<ReshapeQueueDescriptor> { public: - ClReshapeWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); + ClReshapeWorkload(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; diff --git a/src/backends/cl/workloads/ClResizeWorkload.cpp b/src/backends/cl/workloads/ClResizeWorkload.cpp index 744a915616..e47740624e 100644 --- a/src/backends/cl/workloads/ClResizeWorkload.cpp +++ b/src/backends/cl/workloads/ClResizeWorkload.cpp @@ -46,8 +46,10 @@ arm_compute::Status ClResizeWorkloadValidate(const TensorInfo& input, descriptor.m_AlignCorners)); } -ClResizeWorkload::ClResizeWorkload(const ResizeQueueDescriptor& descriptor, const WorkloadInfo& info) : - BaseWorkload<ResizeQueueDescriptor>(descriptor, info) +ClResizeWorkload::ClResizeWorkload(const ResizeQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) + : BaseWorkload<ResizeQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClResizeWorkload", 1, 1); @@ -65,7 +67,8 @@ ClResizeWorkload::ClResizeWorkload(const ResizeQueueDescriptor& descriptor, cons ? arm_compute::SamplingPolicy::CENTER : arm_compute::SamplingPolicy::TOP_LEFT; - m_ResizeLayer.configure(&input, + m_ResizeLayer.configure(clCompileContext, + &input, &output, arm_compute::ScaleKernelInfo(aclInterpolationPolicy, arm_compute::BorderMode::REPLICATE, diff --git a/src/backends/cl/workloads/ClResizeWorkload.hpp b/src/backends/cl/workloads/ClResizeWorkload.hpp index ab5b943457..9549a32165 100644 --- a/src/backends/cl/workloads/ClResizeWorkload.hpp +++ b/src/backends/cl/workloads/ClResizeWorkload.hpp @@ -19,7 +19,9 @@ arm_compute::Status ClResizeWorkloadValidate(const TensorInfo& input, class ClResizeWorkload : public BaseWorkload<ResizeQueueDescriptor> { public: - ClResizeWorkload(const ResizeQueueDescriptor& descriptor, const WorkloadInfo& info); + ClResizeWorkload(const ResizeQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClRsqrtWorkload.cpp b/src/backends/cl/workloads/ClRsqrtWorkload.cpp index 48fd1e0fd0..a3a04c11eb 100644 --- a/src/backends/cl/workloads/ClRsqrtWorkload.cpp +++ b/src/backends/cl/workloads/ClRsqrtWorkload.cpp @@ -23,7 +23,9 @@ arm_compute::Status ClRsqrtWorkloadValidate(const TensorInfo& input, const Tenso return arm_compute::CLRsqrtLayer::validate(&aclInput, &aclOutput); } -ClRsqrtWorkload::ClRsqrtWorkload(const RsqrtQueueDescriptor& descriptor, const WorkloadInfo& info) +ClRsqrtWorkload::ClRsqrtWorkload(const RsqrtQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<RsqrtQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClRsqrtWorkload", 1, 1); @@ -31,7 +33,7 @@ ClRsqrtWorkload::ClRsqrtWorkload(const RsqrtQueueDescriptor& descriptor, const W arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_RsqrtLayer.configure(&input, &output); + m_RsqrtLayer.configure(clCompileContext, &input, &output); } void ClRsqrtWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClRsqrtWorkload.hpp b/src/backends/cl/workloads/ClRsqrtWorkload.hpp index 8fb6229160..35f84146bb 100644 --- a/src/backends/cl/workloads/ClRsqrtWorkload.hpp +++ b/src/backends/cl/workloads/ClRsqrtWorkload.hpp @@ -18,7 +18,9 @@ arm_compute::Status ClRsqrtWorkloadValidate(const TensorInfo& input, const Tenso class ClRsqrtWorkload : public BaseWorkload<RsqrtQueueDescriptor> { public: - ClRsqrtWorkload(const RsqrtQueueDescriptor& descriptor, const WorkloadInfo& info); + ClRsqrtWorkload(const RsqrtQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); virtual void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClSliceWorkload.cpp b/src/backends/cl/workloads/ClSliceWorkload.cpp index d7b1dbbe48..16271961f9 100644 --- a/src/backends/cl/workloads/ClSliceWorkload.cpp +++ b/src/backends/cl/workloads/ClSliceWorkload.cpp @@ -30,7 +30,9 @@ arm_compute::Status ClSliceWorkloadValidate(const TensorInfo& input, return arm_compute::CLSlice::validate(&aclInput, &aclOutput, starts, ends); } -ClSliceWorkload::ClSliceWorkload(const SliceQueueDescriptor& descriptor, const WorkloadInfo& info) +ClSliceWorkload::ClSliceWorkload(const SliceQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<SliceQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClSliceWorkload", 1, 1); @@ -43,7 +45,7 @@ ClSliceWorkload::ClSliceWorkload(const SliceQueueDescriptor& descriptor, const W std::tie(starts, ends) = SetClSliceData(m_Data.m_Parameters.m_Begin, m_Data.m_Parameters.m_Size); - m_SliceFunction.configure(&input, &output, starts, ends); + m_SliceFunction.configure(clCompileContext, &input, &output, starts, ends); } void ClSliceWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClSliceWorkload.hpp b/src/backends/cl/workloads/ClSliceWorkload.hpp index 3460b7788b..67836c2b5c 100644 --- a/src/backends/cl/workloads/ClSliceWorkload.hpp +++ b/src/backends/cl/workloads/ClSliceWorkload.hpp @@ -20,7 +20,9 @@ arm_compute::Status ClSliceWorkloadValidate(const TensorInfo& input, class ClSliceWorkload : public BaseWorkload<SliceQueueDescriptor> { public: - ClSliceWorkload(const SliceQueueDescriptor& descriptor, const WorkloadInfo& info); + ClSliceWorkload(const SliceQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); virtual void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClSoftmaxWorkload.cpp b/src/backends/cl/workloads/ClSoftmaxWorkload.cpp index 8bc2a765ed..4547c682c9 100644 --- a/src/backends/cl/workloads/ClSoftmaxWorkload.cpp +++ b/src/backends/cl/workloads/ClSoftmaxWorkload.cpp @@ -25,8 +25,10 @@ arm_compute::Status ClSoftmaxWorkloadValidate(const TensorInfo& input, return arm_compute::CLSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta, aclAxis); } -ClSoftmaxWorkload::ClSoftmaxWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) +ClSoftmaxWorkload::ClSoftmaxWorkload(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<SoftmaxQueueDescriptor>(descriptor, info) , m_SoftmaxLayer(memoryManager) { @@ -36,7 +38,7 @@ ClSoftmaxWorkload::ClSoftmaxWorkload(const SoftmaxQueueDescriptor& descriptor, c arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); int aclAxis = ComputeAclAxis(m_Data.m_Parameters.m_Axis, info.m_InputTensorInfos[0]); - m_SoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta, aclAxis); + m_SoftmaxLayer.configure(clCompileContext, &input, &output, m_Data.m_Parameters.m_Beta, aclAxis); } void ClSoftmaxWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClSoftmaxWorkload.hpp b/src/backends/cl/workloads/ClSoftmaxWorkload.hpp index 158bf46c32..1742c60511 100644 --- a/src/backends/cl/workloads/ClSoftmaxWorkload.hpp +++ b/src/backends/cl/workloads/ClSoftmaxWorkload.hpp @@ -23,8 +23,10 @@ arm_compute::Status ClSoftmaxWorkloadValidate(const TensorInfo& input, class ClSoftmaxWorkload : public BaseWorkload<SoftmaxQueueDescriptor> { public: - ClSoftmaxWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + ClSoftmaxWorkload(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp index 443c56b7b5..7b29cded0f 100644 --- a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp +++ b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp @@ -45,7 +45,9 @@ arm_compute::Status ClSpaceToBatchNdWorkloadValidate(const TensorInfo& input, } ClSpaceToBatchNdWorkload::ClSpaceToBatchNdWorkload( - const SpaceToBatchNdQueueDescriptor& descriptor, const WorkloadInfo& info) + const SpaceToBatchNdQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<SpaceToBatchNdQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClSpaceToBatchNdWorkload", 1, 1); @@ -68,7 +70,8 @@ ClSpaceToBatchNdWorkload::ClSpaceToBatchNdWorkload( input.info()->set_data_layout(aclDataLayout); output.info()->set_data_layout(aclDataLayout); - m_SpaceToBatchLayer.configure(&input, + m_SpaceToBatchLayer.configure(clCompileContext, + &input, blockWidth, blockHeight, paddingLeftTop, diff --git a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.hpp b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.hpp index 7500b5a3ac..06d243a6d1 100644 --- a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.hpp +++ b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.hpp @@ -22,7 +22,9 @@ arm_compute::Status ClSpaceToBatchNdWorkloadValidate(const TensorInfo& input, class ClSpaceToBatchNdWorkload : public BaseWorkload<SpaceToBatchNdQueueDescriptor> { public: - ClSpaceToBatchNdWorkload(const SpaceToBatchNdQueueDescriptor& descriptor, const WorkloadInfo& info); + ClSpaceToBatchNdWorkload(const SpaceToBatchNdQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp b/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp index f35fe0e3c9..7a590d26b6 100644 --- a/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp +++ b/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp @@ -18,7 +18,8 @@ namespace armnn using namespace armcomputetensorutils; ClSpaceToDepthWorkload::ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& desc, - const WorkloadInfo& info) + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<SpaceToDepthQueueDescriptor>(desc, info) { m_Data.ValidateInputsOutputs("ClSpaceToDepthWorkload", 1, 1); @@ -33,7 +34,7 @@ ClSpaceToDepthWorkload::ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); output.info()->set_data_layout(aclDataLayout); - m_Layer.configure(&input, &output, blockSize); + m_Layer.configure(clCompileContext, &input, &output, blockSize); } void ClSpaceToDepthWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp b/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp index 57ce5d4d05..b782bbe24d 100644 --- a/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp +++ b/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp @@ -19,7 +19,9 @@ arm_compute::Status ClSpaceToDepthWorkloadValidate(const TensorInfo& input, class ClSpaceToDepthWorkload : public BaseWorkload<SpaceToDepthQueueDescriptor> { public: - ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& descriptor, const WorkloadInfo& info); + ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClSplitterWorkload.cpp b/src/backends/cl/workloads/ClSplitterWorkload.cpp index 045fbb7595..70a817825e 100644 --- a/src/backends/cl/workloads/ClSplitterWorkload.cpp +++ b/src/backends/cl/workloads/ClSplitterWorkload.cpp @@ -9,7 +9,6 @@ #include <aclCommon/ArmComputeTensorUtils.hpp> #include <aclCommon/ArmComputeUtils.hpp> -#include <arm_compute/runtime/CL/functions/CLSplit.h> #include <armnn/utility/PolymorphicDowncast.hpp> #include <backendsCommon/CpuTensorHandle.hpp> #include <cl/ClTensorHandle.hpp> @@ -53,7 +52,9 @@ arm_compute::Status ClSplitterWorkloadValidate(const TensorInfo& input, return arm_compute::CLSplit::validate(&aclInputInfo, aclOutputPtr, aclAxis); } -ClSplitterWorkload::ClSplitterWorkload(const SplitterQueueDescriptor& descriptor, const WorkloadInfo& info) +ClSplitterWorkload::ClSplitterWorkload(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext&) : BaseWorkload<SplitterQueueDescriptor>(descriptor, info) { bool allOutputsAreSubtensors = true; diff --git a/src/backends/cl/workloads/ClSplitterWorkload.hpp b/src/backends/cl/workloads/ClSplitterWorkload.hpp index 82211f5baf..c59aa023bf 100644 --- a/src/backends/cl/workloads/ClSplitterWorkload.hpp +++ b/src/backends/cl/workloads/ClSplitterWorkload.hpp @@ -9,6 +9,7 @@ #include <arm_compute/core/Error.h> #include <arm_compute/runtime/IFunction.h> +#include <arm_compute/runtime/CL/functions/CLSplit.h> #include <functional> @@ -22,7 +23,9 @@ arm_compute::Status ClSplitterWorkloadValidate(const TensorInfo& input, class ClSplitterWorkload : public BaseWorkload<SplitterQueueDescriptor> { public: - ClSplitterWorkload(const SplitterQueueDescriptor& descriptor, const WorkloadInfo& info); + ClSplitterWorkload(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; diff --git a/src/backends/cl/workloads/ClStackWorkload.cpp b/src/backends/cl/workloads/ClStackWorkload.cpp index c0b88b1193..749282f53a 100644 --- a/src/backends/cl/workloads/ClStackWorkload.cpp +++ b/src/backends/cl/workloads/ClStackWorkload.cpp @@ -44,7 +44,9 @@ arm_compute::Status ClStackWorkloadValidate(const std::vector<const TensorInfo*> return arm_compute::CLStackLayer::validate(aclInputPtrs, aclAxis, &aclOutputInfo); } -ClStackWorkload::ClStackWorkload(const StackQueueDescriptor& descriptor, const WorkloadInfo& info) +ClStackWorkload::ClStackWorkload(const StackQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<StackQueueDescriptor>(descriptor, info) { std::vector<arm_compute::ICLTensor*> aclInputs; @@ -58,7 +60,7 @@ ClStackWorkload::ClStackWorkload(const StackQueueDescriptor& descriptor, const W m_Layer.reset(new arm_compute::CLStackLayer()); int aclAxis = CalcAxis(descriptor.m_Parameters.m_Axis, descriptor.m_Parameters.m_InputShape.GetNumDimensions()); - m_Layer->configure(aclInputs, aclAxis, &output); + m_Layer->configure(clCompileContext, aclInputs, aclAxis, &output); } void ClStackWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClStackWorkload.hpp b/src/backends/cl/workloads/ClStackWorkload.hpp index f27d6cdad0..3f1e642f60 100644 --- a/src/backends/cl/workloads/ClStackWorkload.hpp +++ b/src/backends/cl/workloads/ClStackWorkload.hpp @@ -18,7 +18,9 @@ arm_compute::Status ClStackWorkloadValidate(const std::vector<const TensorInfo*> class ClStackWorkload : public BaseWorkload<StackQueueDescriptor> { public: - ClStackWorkload(const StackQueueDescriptor& descriptor, const WorkloadInfo& info); + ClStackWorkload(const StackQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; diff --git a/src/backends/cl/workloads/ClStridedSliceWorkload.cpp b/src/backends/cl/workloads/ClStridedSliceWorkload.cpp index b094a910f4..92e860fc42 100644 --- a/src/backends/cl/workloads/ClStridedSliceWorkload.cpp +++ b/src/backends/cl/workloads/ClStridedSliceWorkload.cpp @@ -53,7 +53,8 @@ arm_compute::Status ClStridedSliceWorkloadValidate(const TensorInfo& input, } ClStridedSliceWorkload::ClStridedSliceWorkload(const StridedSliceQueueDescriptor& descriptor, - const WorkloadInfo& info) + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<StridedSliceQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs("ClStridedSliceWorkload", 1, 1); @@ -78,7 +79,8 @@ ClStridedSliceWorkload::ClStridedSliceWorkload(const StridedSliceQueueDescriptor input.info()->set_data_layout(aclDataLayout); output.info()->set_data_layout(aclDataLayout); - m_StridedSliceLayer.configure(&input, + m_StridedSliceLayer.configure(clCompileContext, + &input, &output, starts, ends, diff --git a/src/backends/cl/workloads/ClStridedSliceWorkload.hpp b/src/backends/cl/workloads/ClStridedSliceWorkload.hpp index bce3fe13eb..1229599847 100644 --- a/src/backends/cl/workloads/ClStridedSliceWorkload.hpp +++ b/src/backends/cl/workloads/ClStridedSliceWorkload.hpp @@ -22,7 +22,9 @@ arm_compute::Status ClStridedSliceWorkloadValidate(const TensorInfo& input, class ClStridedSliceWorkload : public BaseWorkload<StridedSliceQueueDescriptor> { public: - ClStridedSliceWorkload(const StridedSliceQueueDescriptor& descriptor, const WorkloadInfo& info); + ClStridedSliceWorkload(const StridedSliceQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; private: diff --git a/src/backends/cl/workloads/ClSubtractionWorkload.cpp b/src/backends/cl/workloads/ClSubtractionWorkload.cpp index 865dceb869..31e0becfd8 100644 --- a/src/backends/cl/workloads/ClSubtractionWorkload.cpp +++ b/src/backends/cl/workloads/ClSubtractionWorkload.cpp @@ -19,7 +19,8 @@ using namespace armcomputetensorutils; static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; ClSubtractionWorkload::ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor, - const WorkloadInfo& info) + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<SubtractionQueueDescriptor>(descriptor, info) { this->m_Data.ValidateInputsOutputs("ClSubtractionWorkload", 2, 1); @@ -30,7 +31,7 @@ ClSubtractionWorkload::ClSubtractionWorkload(const SubtractionQueueDescriptor& d const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); - m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy, activationInfo); + m_Layer.configure(clCompileContext, &input0, &input1, &output, g_AclConvertPolicy, activationInfo); } void ClSubtractionWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClSubtractionWorkload.hpp b/src/backends/cl/workloads/ClSubtractionWorkload.hpp index 9f51de645b..28440b0938 100644 --- a/src/backends/cl/workloads/ClSubtractionWorkload.hpp +++ b/src/backends/cl/workloads/ClSubtractionWorkload.hpp @@ -15,7 +15,9 @@ namespace armnn class ClSubtractionWorkload : public BaseWorkload<SubtractionQueueDescriptor> { public: - ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info); + ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; diff --git a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp index 20b2104c62..ff0fd5c168 100644 --- a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp +++ b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp @@ -56,7 +56,8 @@ arm_compute::Status ClTransposeConvolution2dWorkloadValidate(const TensorInfo& i ClTransposeConvolution2dWorkload::ClTransposeConvolution2dWorkload( const TransposeConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) : + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<TransposeConvolution2dQueueDescriptor>(descriptor, info), m_Layer(memoryManager) { @@ -82,7 +83,7 @@ ClTransposeConvolution2dWorkload::ClTransposeConvolution2dWorkload( output.info()->set_data_layout(aclDataLayout); arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters); - m_Layer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, padStrideInfo); + m_Layer.configure(clCompileContext, &input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, padStrideInfo); InitializeArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight); if (m_BiasesTensor) diff --git a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.hpp b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.hpp index b7320bf6ce..8a24e6d391 100644 --- a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.hpp +++ b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.hpp @@ -29,7 +29,8 @@ class ClTransposeConvolution2dWorkload : public BaseWorkload<TransposeConvolutio public: ClTransposeConvolution2dWorkload(const TransposeConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; diff --git a/src/backends/cl/workloads/ClTransposeWorkload.cpp b/src/backends/cl/workloads/ClTransposeWorkload.cpp index b276b229f6..7ef502eb8d 100644 --- a/src/backends/cl/workloads/ClTransposeWorkload.cpp +++ b/src/backends/cl/workloads/ClTransposeWorkload.cpp @@ -27,7 +27,8 @@ arm_compute::Status ClTransposeWorkloadValidate(const TensorInfo& input, } ClTransposeWorkload::ClTransposeWorkload(const TransposeQueueDescriptor& descriptor, - const WorkloadInfo& info) + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload<TransposeQueueDescriptor>(descriptor, info) { m_Data.ValidateInputsOutputs(GetName(), 1, 1); @@ -36,7 +37,9 @@ ClTransposeWorkload::ClTransposeWorkload(const TransposeQueueDescriptor& descrip arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; // Run the layer. - m_PermuteFunction.configure(&input, &output, + m_PermuteFunction.configure(clCompileContext, + &input, + &output, armcomputetensorutils::BuildArmComputeTransposeVector(mappings)); } diff --git a/src/backends/cl/workloads/ClTransposeWorkload.hpp b/src/backends/cl/workloads/ClTransposeWorkload.hpp index c1bed93b97..4677bdc3f5 100644 --- a/src/backends/cl/workloads/ClTransposeWorkload.hpp +++ b/src/backends/cl/workloads/ClTransposeWorkload.hpp @@ -29,7 +29,9 @@ public: return name; } - ClTransposeWorkload(const TransposeQueueDescriptor& descriptor, const WorkloadInfo& info); + ClTransposeWorkload(const TransposeQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; private: |