aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSadik Armagan <sadik.armagan@arm.com>2020-12-02 11:28:58 +0000
committerSadik Armagan <sadik.armagan@arm.com>2020-12-02 11:29:35 +0000
commite9444751017fe108ce80fd5c270d04fffeb14e1e (patch)
tree29601fdf9017a2dbce71983b995709c24e6c773c
parentbbbefecd34a9420bcb003dd230402c55ee5150d5 (diff)
downloadarmnn-e9444751017fe108ce80fd5c270d04fffeb14e1e.tar.gz
IVGCVSW-5482 'Add a ClCompileContext parameter to each ClWorkload Constructor'
* Injected CLCompileContext object to each CL workload. Signed-off-by: Sadik Armagan <sadik.armagan@arm.com> Change-Id: I4837dbd3d5b56cf743b3b89c944e3cdf8b11a42a
-rw-r--r--src/backends/cl/ClWorkloadFactory.cpp130
-rw-r--r--src/backends/cl/test/ClCreateWorkloadTests.cpp94
-rw-r--r--src/backends/cl/workloads/ClAbsWorkload.cpp6
-rw-r--r--src/backends/cl/workloads/ClAbsWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClActivationWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClActivationWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClAdditionWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClAdditionWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClArgMinMaxWorkload.cpp9
-rw-r--r--src/backends/cl/workloads/ClArgMinMaxWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp7
-rw-r--r--src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp7
-rw-r--r--src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClComparisonWorkload.cpp6
-rw-r--r--src/backends/cl/workloads/ClComparisonWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClConcatWorkload.cpp7
-rw-r--r--src/backends/cl/workloads/ClConcatWorkload.hpp5
-rw-r--r--src/backends/cl/workloads/ClConstantWorkload.cpp4
-rw-r--r--src/backends/cl/workloads/ClConstantWorkload.hpp6
-rw-r--r--src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp6
-rw-r--r--src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp4
-rw-r--r--src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp6
-rw-r--r--src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp4
-rw-r--r--src/backends/cl/workloads/ClConvolution2dWorkload.cpp4
-rw-r--r--src/backends/cl/workloads/ClConvolution2dWorkload.hpp1
-rw-r--r--src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp4
-rw-r--r--src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp3
-rw-r--r--src/backends/cl/workloads/ClDequantizeWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClDequantizeWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClDivisionFloatWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClDivisionFloatWorkload.hpp5
-rw-r--r--src/backends/cl/workloads/ClExpWorkload.cpp6
-rw-r--r--src/backends/cl/workloads/ClExpWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClFillWorkload.cpp6
-rw-r--r--src/backends/cl/workloads/ClFillWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClFloorFloatWorkload.cpp6
-rw-r--r--src/backends/cl/workloads/ClFloorFloatWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClFullyConnectedWorkload.cpp14
-rw-r--r--src/backends/cl/workloads/ClFullyConnectedWorkload.hpp3
-rw-r--r--src/backends/cl/workloads/ClGatherWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClGatherWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp6
-rw-r--r--src/backends/cl/workloads/ClInstanceNormalizationWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClLogSoftmaxWorkload.hpp3
-rw-r--r--src/backends/cl/workloads/ClLogicalAndWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClLogicalAndWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClLogicalNotWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClLogicalNotWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClLogicalOrWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClLogicalOrWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClLstmFloatWorkload.cpp15
-rw-r--r--src/backends/cl/workloads/ClLstmFloatWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClMaximumWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClMaximumWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClMeanWorkload.cpp6
-rw-r--r--src/backends/cl/workloads/ClMeanWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClMinimumWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClMinimumWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClMultiplicationWorkload.cpp6
-rw-r--r--src/backends/cl/workloads/ClMultiplicationWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClNegWorkload.cpp6
-rw-r--r--src/backends/cl/workloads/ClNegWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClPadWorkload.cpp6
-rw-r--r--src/backends/cl/workloads/ClPadWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClPermuteWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClPermuteWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClPooling2dWorkload.cpp6
-rw-r--r--src/backends/cl/workloads/ClPooling2dWorkload.hpp3
-rw-r--r--src/backends/cl/workloads/ClPreluWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClPreluWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClQLstmWorkload.cpp9
-rw-r--r--src/backends/cl/workloads/ClQLstmWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClQuantizeWorkload.cpp6
-rw-r--r--src/backends/cl/workloads/ClQuantizeWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp6
-rw-r--r--src/backends/cl/workloads/ClQuantizedLstmWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClReshapeWorkload.cpp6
-rw-r--r--src/backends/cl/workloads/ClReshapeWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClResizeWorkload.cpp9
-rw-r--r--src/backends/cl/workloads/ClResizeWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClRsqrtWorkload.cpp6
-rw-r--r--src/backends/cl/workloads/ClRsqrtWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClSliceWorkload.cpp6
-rw-r--r--src/backends/cl/workloads/ClSliceWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClSoftmaxWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClSoftmaxWorkload.hpp6
-rw-r--r--src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp7
-rw-r--r--src/backends/cl/workloads/ClSpaceToBatchNdWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClSplitterWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClSplitterWorkload.hpp5
-rw-r--r--src/backends/cl/workloads/ClStackWorkload.cpp6
-rw-r--r--src/backends/cl/workloads/ClStackWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClStridedSliceWorkload.cpp6
-rw-r--r--src/backends/cl/workloads/ClStridedSliceWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClSubtractionWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClSubtractionWorkload.hpp4
-rw-r--r--src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClTransposeConvolution2dWorkload.hpp3
-rw-r--r--src/backends/cl/workloads/ClTransposeWorkload.cpp7
-rw-r--r--src/backends/cl/workloads/ClTransposeWorkload.hpp4
110 files changed, 543 insertions, 230 deletions
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index 41b779f64a..35186f286a 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -107,9 +107,8 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(const QueueDescriptor
void ClWorkloadFactory::InitializeCLCompileContext()
{
// Initialize our m_CLCompileContext using default device and context
- cl::Device device = cl::Device::getDefault();
- cl::Context context = cl::Context(device);
-
+ auto context = arm_compute::CLKernelLibrary::get().context();
+ auto device = arm_compute::CLKernelLibrary::get().get_device();
m_CLCompileContext = arm_compute::CLCompileContext(context, device);
if (m_ModelContextPtr)
@@ -200,64 +199,64 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateAbs(const AbsQueueDescriptor
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClActivationWorkload>(descriptor, info);
+ return MakeWorkload<ClActivationWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClAdditionWorkload>(descriptor, info);
+ return MakeWorkload<ClAdditionWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateArgMinMax(const ArgMinMaxQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return std::make_unique<ClArgMinMaxWorkload>(descriptor, info);
+ return std::make_unique<ClArgMinMaxWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateBatchNormalization(
const BatchNormalizationQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClBatchNormalizationFloatWorkload, NullWorkload>(descriptor, info);
+ return MakeWorkload<ClBatchNormalizationFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClBatchToSpaceNdWorkload>(descriptor, info);
+ return MakeWorkload<ClBatchToSpaceNdWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateComparison(const ComparisonQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClComparisonWorkload>(descriptor, info);
+ return MakeWorkload<ClComparisonWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConcat(const ConcatQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClConcatWorkload>(descriptor, info);
+ return MakeWorkload<ClConcatWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClConstantWorkload>(descriptor, info);
+ return MakeWorkload<ClConstantWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp16ToFp32(
const ConvertFp16ToFp32QueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClConvertFp16ToFp32Workload>(descriptor, info);
+ return MakeWorkload<ClConvertFp16ToFp32Workload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp32ToFp16(
const ConvertFp32ToFp16QueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClConvertFp32ToFp16Workload>(descriptor, info);
+ return MakeWorkload<ClConvertFp32ToFp16Workload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
@@ -278,45 +277,46 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvolution2d(const Convolut
return MakeWorkload<ClConvolution2dWorkload>(descriptor,
info,
m_MemoryManager->GetIntraLayerManager(),
+ m_CLCompileContext,
isFastMathEnabled);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
+ return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthToSpace(const DepthToSpaceQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClDepthToSpaceWorkload>(descriptor, info);
+ return MakeWorkload<ClDepthToSpaceWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthwiseConvolution2d(
const DepthwiseConvolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClDepthwiseConvolutionWorkload>(descriptor, info);
+ return MakeWorkload<ClDepthwiseConvolutionWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDequantize(const DequantizeQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClDequantizeWorkload>(descriptor, info);
+ return MakeWorkload<ClDequantizeWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDetectionPostProcess(
const DetectionPostProcessQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
+ return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClDivisionFloatWorkload, NullWorkload>(descriptor, info);
+ return MakeWorkload<ClDivisionFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor& descriptor,
@@ -330,22 +330,22 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateElementwiseUnary(const Eleme
absQueueDescriptor.m_Inputs = descriptor.m_Inputs;
absQueueDescriptor.m_Outputs = descriptor.m_Outputs;
- return std::make_unique<ClAbsWorkload>(absQueueDescriptor, info);
+ return std::make_unique<ClAbsWorkload>(absQueueDescriptor, info, m_CLCompileContext);
}
case UnaryOperation::Exp:
- return std::make_unique<ClExpWorkload>(descriptor, info);
+ return std::make_unique<ClExpWorkload>(descriptor, info, m_CLCompileContext);
case UnaryOperation::Neg:
- return std::make_unique<ClNegWorkload>(descriptor, info);
+ return std::make_unique<ClNegWorkload>(descriptor, info, m_CLCompileContext);
case UnaryOperation::Rsqrt:
{
RsqrtQueueDescriptor rsqrtQueueDescriptor;
rsqrtQueueDescriptor.m_Inputs = descriptor.m_Inputs;
rsqrtQueueDescriptor.m_Outputs = descriptor.m_Outputs;
- return std::make_unique<ClRsqrtWorkload>(rsqrtQueueDescriptor, info);
+ return std::make_unique<ClRsqrtWorkload>(rsqrtQueueDescriptor, info, m_CLCompileContext);
}
case UnaryOperation::LogicalNot:
- return std::make_unique<ClLogicalNotWorkload>(descriptor, info);
+ return std::make_unique<ClLogicalNotWorkload>(descriptor, info, m_CLCompileContext);
default:
return nullptr;
}
@@ -365,25 +365,28 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateEqual(const EqualQueueDescri
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFill(const FillQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return std::make_unique<ClFillWorkload>(descriptor, info);
+ return std::make_unique<ClFillWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClFloorFloatWorkload, NullWorkload>(descriptor, info);
+ return MakeWorkload<ClFloorFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClFullyConnectedWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
+ return MakeWorkload<ClFullyConnectedWorkload>(descriptor,
+ info,
+ m_MemoryManager->GetIntraLayerManager(),
+ m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateGather(const GatherQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClGatherWorkload>(descriptor, info);
+ return MakeWorkload<ClGatherWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateGreater(const GreaterQueueDescriptor& descriptor,
@@ -407,13 +410,13 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateInstanceNormalization(
const InstanceNormalizationQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClInstanceNormalizationWorkload>(descriptor, info);
+ return MakeWorkload<ClInstanceNormalizationWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClL2NormalizationFloatWorkload, NullWorkload>(descriptor, info);
+ return MakeWorkload<ClL2NormalizationFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLogicalBinary(const LogicalBinaryQueueDescriptor& descriptor,
@@ -422,9 +425,9 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLogicalBinary(const LogicalB
switch(descriptor.m_Parameters.m_Operation)
{
case LogicalBinaryOperation::LogicalAnd:
- return std::make_unique<ClLogicalAndWorkload>(descriptor, info);
+ return std::make_unique<ClLogicalAndWorkload>(descriptor, info, m_CLCompileContext);
case LogicalBinaryOperation::LogicalOr:
- return std::make_unique<ClLogicalOrWorkload>(descriptor, info);
+ return std::make_unique<ClLogicalOrWorkload>(descriptor, info, m_CLCompileContext);
default:
return nullptr;
}
@@ -433,25 +436,28 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLogicalBinary(const LogicalB
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLogSoftmax(const LogSoftmaxQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClLogSoftmaxWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
+ return MakeWorkload<ClLogSoftmaxWorkload>(descriptor,
+ info,
+ m_MemoryManager->GetIntraLayerManager(),
+ m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClLstmFloatWorkload, NullWorkload>(descriptor, info);
+ return MakeWorkload<ClLstmFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMaximum(const MaximumQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClMaximumWorkload>(descriptor, info);
+ return MakeWorkload<ClMaximumWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClMeanWorkload>(descriptor, info);
+ return MakeWorkload<ClMeanWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
@@ -485,19 +491,19 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMerger(const MergerQueueDesc
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMinimum(const MinimumQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClMinimumWorkload>(descriptor, info);
+ return MakeWorkload<ClMinimumWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClMultiplicationWorkload>(descriptor, info);
+ return MakeWorkload<ClMultiplicationWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClNormalizationFloatWorkload, NullWorkload>(descriptor, info);
+ return MakeWorkload<ClNormalizationFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
@@ -509,61 +515,61 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateOutput(const OutputQueueDesc
std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClPadWorkload>(descriptor, info);
+ return MakeWorkload<ClPadWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClPermuteWorkload>(descriptor, info);
+ return MakeWorkload<ClPermuteWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClPooling2dWorkload>(descriptor, info);
+ return MakeWorkload<ClPooling2dWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
+ return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePrelu(const PreluQueueDescriptor &descriptor,
const WorkloadInfo &info) const
{
- return MakeWorkload<ClPreluWorkload>(descriptor, info);
+ return MakeWorkload<ClPreluWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQLstm(const QLstmQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return std::make_unique<ClQLstmWorkload>(descriptor, info);
+ return std::make_unique<ClQLstmWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQuantize(const QuantizeQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClQuantizeWorkload>(descriptor, info);
+ return MakeWorkload<ClQuantizeWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQuantizedLstm(const QuantizedLstmQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClQuantizedLstmWorkload>(descriptor, info);
+ return MakeWorkload<ClQuantizedLstmWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClReshapeWorkload>(descriptor, info);
+ return MakeWorkload<ClReshapeWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClResizeWorkload>(descriptor, info);
+ return MakeWorkload<ClResizeWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor,
@@ -595,62 +601,68 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateRsqrt(const RsqrtQueueDescri
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSlice(const SliceQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClSliceWorkload>(descriptor, info);
+ return MakeWorkload<ClSliceWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return std::make_unique<ClSoftmaxWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
+ return std::make_unique<ClSoftmaxWorkload>(descriptor,
+ info,
+ m_MemoryManager->GetIntraLayerManager(),
+ m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClSpaceToBatchNdWorkload>(descriptor, info);
+ return MakeWorkload<ClSpaceToBatchNdWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSpaceToDepth(const SpaceToDepthQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClSpaceToDepthWorkload>(descriptor, info);
+ return MakeWorkload<ClSpaceToDepthWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClSplitterWorkload>(descriptor, info);
+ return MakeWorkload<ClSplitterWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateStack(const StackQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClStackWorkload>(descriptor, info);
+ return MakeWorkload<ClStackWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClStridedSliceWorkload>(descriptor, info);
+ return MakeWorkload<ClStridedSliceWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClSubtractionWorkload>(descriptor, info);
+ return MakeWorkload<ClSubtractionWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateTranspose(const TransposeQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClTransposeWorkload>(descriptor, info);
+ return MakeWorkload<ClTransposeWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateTransposeConvolution2d(
const TransposeConvolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClTransposeConvolution2dWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
+ return MakeWorkload<ClTransposeConvolution2dWorkload>(descriptor,
+ info,
+ m_MemoryManager->GetIntraLayerManager(),
+ m_CLCompileContext);
}
} // namespace armnn
diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp
index 4bd3d3a33d..765409a426 100644
--- a/src/backends/cl/test/ClCreateWorkloadTests.cpp
+++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp
@@ -10,6 +10,8 @@
#include <armnn/utility/IgnoreUnused.hpp>
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <backendsCommon/MemCopyWorkload.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
#include <aclCommon/test/CreateWorkloadClNeon.hpp>
#include <aclCommon/ArmComputeTensorUtils.hpp>
@@ -334,6 +336,98 @@ BOOST_AUTO_TEST_CASE(CreateConvolution2dFastMathEnabledWorkload)
ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::WINOGRAD);
}
+BOOST_AUTO_TEST_CASE(CreateConvolution2dClCompiledContextWorkload)
+{
+ using namespace armnn;
+
+ const DataType inputType = DataType::QAsymmU8;
+ const DataType kernelType = DataType::QSymmS8;
+ const DataType biasType = DataType::Signed32;
+
+ TensorInfo inputInfo ({ 1, 3, 1, 2 }, inputType, 0.5f, 128);
+ TensorInfo outputInfo({ 1, 3, 1, 3 }, inputType, 1.0f, 128);
+
+ const std::vector<float> quantScales{ 0.5f, 0.75f, 1.0f };
+ constexpr unsigned int quantDimension = 0;
+
+ TensorInfo kernelInfo({ 3, 1, 1, 2 }, kernelType, quantScales, quantDimension);
+
+ const std::vector<float> biasQuantScales{ 0.25f, 0.375f, 0.5f };
+ TensorInfo biasInfo({ 3 }, biasType, biasQuantScales, quantDimension);
+
+ std::vector<uint8_t> inputData =
+ {
+ 138, 108, 138, 108, 138, 108
+ };
+
+ std::vector<int8_t> kernelData =
+ {
+ 1, 2, 1, 2, 1, 2
+ };
+
+ std::vector<int32_t> biasData =
+ {
+ 4, 4, 4
+ };
+
+ std::vector<uint8_t> expectedOutputData =
+ {
+ 121, 118, 115, 121, 118, 115, 121, 118, 115
+ };
+
+
+ Convolution2dDescriptor descriptor;
+ descriptor.m_StrideX = 1;
+ descriptor.m_StrideY = 1;
+ descriptor.m_PadLeft = 0;
+ descriptor.m_PadRight = 0;
+ descriptor.m_PadTop = 0;
+ descriptor.m_PadBottom = 0;
+ descriptor.m_BiasEnabled = true;
+ descriptor.m_DataLayout = DataLayout::NHWC;
+
+ auto memoryManager = ClWorkloadFactoryHelper::GetMemoryManager();
+ auto clMemoryManager = armnn::PolymorphicPointerDowncast<armnn::ClMemoryManager>(memoryManager);
+ auto tensorHandleFactory = ClWorkloadFactoryHelper::GetTensorHandleFactory(memoryManager);
+
+ std::unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo);
+ std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
+
+
+ WorkloadInfo workloadInfo;
+ ScopedCpuTensorHandle weightTensor(kernelInfo);
+ ScopedCpuTensorHandle biasTensor(biasInfo);
+
+ AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
+ AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
+
+ Convolution2dQueueDescriptor queueDescriptor;
+ queueDescriptor.m_Parameters = descriptor;
+ queueDescriptor.m_Weight = &weightTensor;
+ queueDescriptor.m_Bias = &biasTensor;
+
+ AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
+ AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
+
+ // Initialize our m_CLCompileContext using default device and context
+ auto context = arm_compute::CLKernelLibrary::get().context();
+ auto device = arm_compute::CLKernelLibrary::get().get_device();
+ auto clCompileContext = arm_compute::CLCompileContext(context, device);
+
+
+
+ // Check built programs are empty in context
+ BOOST_TEST(clCompileContext.get_built_programs().empty());
+
+ auto workload = std::make_unique<ClConvolution2dWorkload>(queueDescriptor,
+ workloadInfo,
+ clMemoryManager->GetIntraLayerManager(),
+ clCompileContext);
+ ARMNN_ASSERT(workload != nullptr);
+ // Check built programs are not empty in context
+ BOOST_TEST(!clCompileContext.get_built_programs().empty());
+}
+
template <typename DepthwiseConvolutionWorkloadType, typename armnn::DataType DataType>
static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout)
{
diff --git a/src/backends/cl/workloads/ClAbsWorkload.cpp b/src/backends/cl/workloads/ClAbsWorkload.cpp
index 858ef5b46d..4682c646d1 100644
--- a/src/backends/cl/workloads/ClAbsWorkload.cpp
+++ b/src/backends/cl/workloads/ClAbsWorkload.cpp
@@ -24,7 +24,9 @@ arm_compute::Status ClAbsWorkloadValidate(const TensorInfo& input, const TensorI
return arm_compute::CLAbsLayer::validate(&aclInput, &aclOutput);
}
-ClAbsWorkload::ClAbsWorkload(const AbsQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClAbsWorkload::ClAbsWorkload(const AbsQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<AbsQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClAbsWorkload", 1, 1);
@@ -32,7 +34,7 @@ ClAbsWorkload::ClAbsWorkload(const AbsQueueDescriptor& descriptor, const Workloa
arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_AbsLayer.configure(&input, &output);
+ m_AbsLayer.configure(clCompileContext, &input, &output);
}
void ClAbsWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClAbsWorkload.hpp b/src/backends/cl/workloads/ClAbsWorkload.hpp
index 763cafcfbd..d0f7d16f41 100644
--- a/src/backends/cl/workloads/ClAbsWorkload.hpp
+++ b/src/backends/cl/workloads/ClAbsWorkload.hpp
@@ -18,7 +18,9 @@ arm_compute::Status ClAbsWorkloadValidate(const TensorInfo& input, const TensorI
class ClAbsWorkload : public BaseWorkload<AbsQueueDescriptor>
{
public:
- ClAbsWorkload(const AbsQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClAbsWorkload(const AbsQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClActivationWorkload.cpp b/src/backends/cl/workloads/ClActivationWorkload.cpp
index 685652036b..8997a9720d 100644
--- a/src/backends/cl/workloads/ClActivationWorkload.cpp
+++ b/src/backends/cl/workloads/ClActivationWorkload.cpp
@@ -30,7 +30,8 @@ arm_compute::Status ClActivationWorkloadValidate(const TensorInfo& input,
}
ClActivationWorkload::ClActivationWorkload(const ActivationQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<ActivationQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClActivationWorkload", 1, 1);
@@ -40,7 +41,7 @@ ClActivationWorkload::ClActivationWorkload(const ActivationQueueDescriptor& desc
arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_ActivationLayer.configure(&input, &output, activationLayerInfo);
+ m_ActivationLayer.configure(clCompileContext, &input, &output, activationLayerInfo);
}
void ClActivationWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClActivationWorkload.hpp b/src/backends/cl/workloads/ClActivationWorkload.hpp
index 35166332e6..6b71e8653a 100644
--- a/src/backends/cl/workloads/ClActivationWorkload.hpp
+++ b/src/backends/cl/workloads/ClActivationWorkload.hpp
@@ -18,7 +18,9 @@ arm_compute::Status ClActivationWorkloadValidate(const TensorInfo& input,
class ClActivationWorkload : public BaseWorkload<ActivationQueueDescriptor>
{
public:
- ClActivationWorkload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClActivationWorkload(const ActivationQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClAdditionWorkload.cpp b/src/backends/cl/workloads/ClAdditionWorkload.cpp
index 7e75a04110..0ab7446026 100644
--- a/src/backends/cl/workloads/ClAdditionWorkload.cpp
+++ b/src/backends/cl/workloads/ClAdditionWorkload.cpp
@@ -19,7 +19,8 @@ using namespace armcomputetensorutils;
static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
ClAdditionWorkload::ClAdditionWorkload(const AdditionQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<AdditionQueueDescriptor>(descriptor, info)
{
this->m_Data.ValidateInputsOutputs("ClAdditionWorkload", 2, 1);
@@ -30,7 +31,7 @@ ClAdditionWorkload::ClAdditionWorkload(const AdditionQueueDescriptor& descriptor
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
- m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy, activationInfo);
+ m_Layer.configure(clCompileContext, &input0, &input1, &output, g_AclConvertPolicy, activationInfo);
}
void ClAdditionWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClAdditionWorkload.hpp b/src/backends/cl/workloads/ClAdditionWorkload.hpp
index 372c4bc6f7..cd25be12e7 100644
--- a/src/backends/cl/workloads/ClAdditionWorkload.hpp
+++ b/src/backends/cl/workloads/ClAdditionWorkload.hpp
@@ -15,7 +15,9 @@ namespace armnn
class ClAdditionWorkload : public BaseWorkload<AdditionQueueDescriptor>
{
public:
- ClAdditionWorkload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClAdditionWorkload(const AdditionQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp b/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp
index 5910080859..8974930afa 100644
--- a/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp
+++ b/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp
@@ -53,7 +53,8 @@ arm_compute::Status ClArgMinMaxWorkloadValidate(const TensorInfo& input,
ClArgMinMaxWorkload::ClArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<ArgMinMaxQueueDescriptor>(descriptor, info)
{
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor();
@@ -69,7 +70,11 @@ ClArgMinMaxWorkload::ClArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descrip
}
else
{
- m_ArgMinMaxLayer.configure(&input, aclAxis, &output, arm_compute::ReductionOperation::ARG_IDX_MIN);
+ m_ArgMinMaxLayer.configure(clCompileContext,
+ &input,
+ aclAxis,
+ &output,
+ arm_compute::ReductionOperation::ARG_IDX_MIN);
}
}
diff --git a/src/backends/cl/workloads/ClArgMinMaxWorkload.hpp b/src/backends/cl/workloads/ClArgMinMaxWorkload.hpp
index 54f28e6175..3ec137d49e 100644
--- a/src/backends/cl/workloads/ClArgMinMaxWorkload.hpp
+++ b/src/backends/cl/workloads/ClArgMinMaxWorkload.hpp
@@ -20,7 +20,9 @@ arm_compute::Status ClArgMinMaxWorkloadValidate(const TensorInfo& input,
class ClArgMinMaxWorkload : public BaseWorkload<ArgMinMaxQueueDescriptor>
{
public:
- ClArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp
index c595e20a1f..daaed17a90 100644
--- a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp
@@ -52,7 +52,9 @@ arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input,
}
ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload(
- const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info)
+ const BatchNormalizationQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: FloatWorkload<BatchNormalizationQueueDescriptor>(descriptor, info)
{
m_Mean = std::make_unique<arm_compute::CLTensor>();
@@ -78,7 +80,8 @@ ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload(
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
- m_Layer.configure(&input,
+ m_Layer.configure(clCompileContext,
+ &input,
&output,
m_Mean.get(),
m_Variance.get(),
diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp
index ef5778309e..c9f1f7f295 100644
--- a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp
+++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp
@@ -25,7 +25,9 @@ arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input,
class ClBatchNormalizationFloatWorkload : public FloatWorkload<BatchNormalizationQueueDescriptor>
{
public:
- ClBatchNormalizationFloatWorkload(const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClBatchNormalizationFloatWorkload(const BatchNormalizationQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
using FloatWorkload<BatchNormalizationQueueDescriptor>::FloatWorkload;
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp
index 1a7a8dca81..8978c5a66e 100644
--- a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp
+++ b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp
@@ -18,8 +18,9 @@ namespace armnn
using namespace armcomputetensorutils;
ClBatchToSpaceNdWorkload::ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& desc,
- const WorkloadInfo& info)
- : BaseWorkload<BatchToSpaceNdQueueDescriptor>(desc, info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
+ : BaseWorkload<BatchToSpaceNdQueueDescriptor>(desc, info)
{
m_Data.ValidateInputsOutputs("ClBatchToSpaceNdWorkload", 1, 1);
@@ -35,7 +36,7 @@ ClBatchToSpaceNdWorkload::ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDesc
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
output.info()->set_data_layout(aclDataLayout);
- m_Layer.configure(&input, blockWidth, blockHeight, &output);
+ m_Layer.configure(clCompileContext, &input, blockWidth, blockHeight, &output);
}
void ClBatchToSpaceNdWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp
index 881b294097..2262f33c73 100644
--- a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp
+++ b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp
@@ -18,7 +18,9 @@ arm_compute::Status ClBatchToSpaceNdWorkloadValidate(const TensorInfo& input,
class ClBatchToSpaceNdWorkload : public BaseWorkload<BatchToSpaceNdQueueDescriptor>
{
public:
- ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClComparisonWorkload.cpp b/src/backends/cl/workloads/ClComparisonWorkload.cpp
index 30b336dd94..20e5669807 100644
--- a/src/backends/cl/workloads/ClComparisonWorkload.cpp
+++ b/src/backends/cl/workloads/ClComparisonWorkload.cpp
@@ -39,7 +39,9 @@ arm_compute::Status ClComparisonWorkloadValidate(const TensorInfo& input0,
return aclStatus;
}
-ClComparisonWorkload::ClComparisonWorkload(const ComparisonQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClComparisonWorkload::ClComparisonWorkload(const ComparisonQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<ComparisonQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClComparisonWorkload", 2, 1);
@@ -50,7 +52,7 @@ ClComparisonWorkload::ClComparisonWorkload(const ComparisonQueueDescriptor& desc
const arm_compute::ComparisonOperation comparisonOperation = ConvertComparisonOperationToAcl(m_Data.m_Parameters);
- m_ComparisonLayer.configure(&input0, &input1, &output, comparisonOperation);
+ m_ComparisonLayer.configure(clCompileContext, &input0, &input1, &output, comparisonOperation);
}
void ClComparisonWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClComparisonWorkload.hpp b/src/backends/cl/workloads/ClComparisonWorkload.hpp
index e842152fed..4a92e6b6dc 100644
--- a/src/backends/cl/workloads/ClComparisonWorkload.hpp
+++ b/src/backends/cl/workloads/ClComparisonWorkload.hpp
@@ -20,7 +20,9 @@ arm_compute::Status ClComparisonWorkloadValidate(const TensorInfo& input0,
class ClComparisonWorkload : public BaseWorkload<ComparisonQueueDescriptor>
{
public:
- ClComparisonWorkload(const ComparisonQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClComparisonWorkload(const ComparisonQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClConcatWorkload.cpp b/src/backends/cl/workloads/ClConcatWorkload.cpp
index da0d675232..3c5f23742a 100644
--- a/src/backends/cl/workloads/ClConcatWorkload.cpp
+++ b/src/backends/cl/workloads/ClConcatWorkload.cpp
@@ -11,7 +11,6 @@
#include <cl/ClLayerSupport.hpp>
#include <arm_compute/core/Types.h>
-#include <arm_compute/runtime/CL/functions/CLConcatenateLayer.h>
namespace armnn
{
@@ -46,7 +45,9 @@ arm_compute::Status ClConcatWorkloadValidate(const std::vector<const TensorInfo*
return arm_compute::CLConcatenateLayer::validate(aclInputPtrs, &aclOutputInfo, aclAxis);
}
-ClConcatWorkload::ClConcatWorkload(const ConcatQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClConcatWorkload::ClConcatWorkload(const ConcatQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<ConcatQueueDescriptor>(descriptor, info)
{
bool allInputsAreSubtensors = true;
@@ -83,7 +84,7 @@ ClConcatWorkload::ClConcatWorkload(const ConcatQueueDescriptor& descriptor, cons
// Configure input and output tensors
size_t aclAxis = CalcAxis(descriptor.m_Parameters);
- layer->configure(aclInputs, &output, aclAxis);
+ layer->configure(clCompileContext, aclInputs, &output, aclAxis);
// Prepare
layer->prepare();
diff --git a/src/backends/cl/workloads/ClConcatWorkload.hpp b/src/backends/cl/workloads/ClConcatWorkload.hpp
index 772bc094ea..3120b423b6 100644
--- a/src/backends/cl/workloads/ClConcatWorkload.hpp
+++ b/src/backends/cl/workloads/ClConcatWorkload.hpp
@@ -9,6 +9,7 @@
#include <arm_compute/core/Error.h>
#include <arm_compute/runtime/IFunction.h>
+#include <arm_compute/runtime/CL/functions/CLConcatenateLayer.h>
namespace armnn
{
@@ -20,7 +21,9 @@ arm_compute::Status ClConcatWorkloadValidate(const std::vector<const TensorInfo*
class ClConcatWorkload : public BaseWorkload<ConcatQueueDescriptor>
{
public:
- ClConcatWorkload(const ConcatQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClConcatWorkload(const ConcatQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClConstantWorkload.cpp b/src/backends/cl/workloads/ClConstantWorkload.cpp
index d6b5c57a7e..40acb8ebd0 100644
--- a/src/backends/cl/workloads/ClConstantWorkload.cpp
+++ b/src/backends/cl/workloads/ClConstantWorkload.cpp
@@ -41,7 +41,9 @@ arm_compute::Status ClConstantWorkloadValidate(const TensorInfo& output)
}
}
-ClConstantWorkload::ClConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClConstantWorkload::ClConstantWorkload(const ConstantQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext&)
: BaseWorkload<ConstantQueueDescriptor>(descriptor, info)
, m_RanOnce(false)
{
diff --git a/src/backends/cl/workloads/ClConstantWorkload.hpp b/src/backends/cl/workloads/ClConstantWorkload.hpp
index e5a1d4410d..8fa5d632c2 100644
--- a/src/backends/cl/workloads/ClConstantWorkload.hpp
+++ b/src/backends/cl/workloads/ClConstantWorkload.hpp
@@ -8,6 +8,8 @@
#include <arm_compute/core/Error.h>
#include <backendsCommon/Workload.hpp>
+#include <arm_compute/core/CL/CLCompileContext.h>
+
namespace armnn
{
arm_compute::Status ClConstantWorkloadValidate(const TensorInfo& output);
@@ -15,7 +17,9 @@ arm_compute::Status ClConstantWorkloadValidate(const TensorInfo& output);
class ClConstantWorkload : public BaseWorkload<ConstantQueueDescriptor>
{
public:
- ClConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClConstantWorkload(const ConstantQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp
index d2e86f8c94..aaffd83741 100644
--- a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp
+++ b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp
@@ -15,7 +15,9 @@ using namespace armcomputetensorutils;
static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
ClConvertFp16ToFp32Workload::ClConvertFp16ToFp32Workload(
- const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info) :
+ const ConvertFp16ToFp32QueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext) :
Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>(descriptor, info)
{
this->m_Data.ValidateInputsOutputs("ClConvertFp16ToFp32Workload", 1, 1);
@@ -23,7 +25,7 @@ ClConvertFp16ToFp32Workload::ClConvertFp16ToFp32Workload(
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
- m_Layer.configure(&input, &output, g_AclConvertPolicy, 0);
+ m_Layer.configure(clCompileContext, &input, &output, g_AclConvertPolicy, 0);
}
void ClConvertFp16ToFp32Workload::Execute() const
diff --git a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp
index ef5c9b6497..e8e6c98014 100644
--- a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp
+++ b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp
@@ -16,7 +16,9 @@ class ClConvertFp16ToFp32Workload : public Float16ToFloat32Workload<ConvertFp16T
{
public:
- ClConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp
index 3f528a1532..a9f1d91bcf 100644
--- a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp
+++ b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp
@@ -15,7 +15,9 @@ using namespace armcomputetensorutils;
static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
ClConvertFp32ToFp16Workload::ClConvertFp32ToFp16Workload(
- const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info) :
+ const ConvertFp32ToFp16QueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext) :
Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>(descriptor, info)
{
this->m_Data.ValidateInputsOutputs("ClConvertFp32ToFp16Workload", 1, 1);
@@ -23,7 +25,7 @@ ClConvertFp32ToFp16Workload::ClConvertFp32ToFp16Workload(
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
- m_Layer.configure(&input, &output, g_AclConvertPolicy, 0);
+ m_Layer.configure(clCompileContext, &input, &output, g_AclConvertPolicy, 0);
}
void ClConvertFp32ToFp16Workload::Execute() const
diff --git a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp
index 6e04e39425..17eac7d23b 100644
--- a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp
+++ b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp
@@ -16,7 +16,9 @@ class ClConvertFp32ToFp16Workload : public Float32ToFloat16Workload<ConvertFp32T
{
public:
- ClConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
index 50cb9ded37..99a981bd0c 100644
--- a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
+++ b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
@@ -65,6 +65,7 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input,
ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info,
std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const arm_compute::CLCompileContext& clCompileContext,
const bool isFastMathEnabled)
: BaseWorkload<Convolution2dQueueDescriptor>(descriptor, info)
, m_ConvolutionLayer(memoryManager)
@@ -97,7 +98,8 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
- m_ConvolutionLayer.configure(&input,
+ m_ConvolutionLayer.configure(clCompileContext,
+ &input,
m_KernelTensor.get(),
m_BiasTensor.get(),
&output,
diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp
index 70170b569d..d0f7a5b251 100644
--- a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp
+++ b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp
@@ -32,6 +32,7 @@ public:
ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info,
std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const arm_compute::CLCompileContext& clCompileContext,
const bool isFastMathEnabled = false);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp b/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp
index 43c81dc209..d42b261a10 100644
--- a/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp
+++ b/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp
@@ -37,7 +37,8 @@ arm_compute::Status ClDepthToSpaceWorkloadValidate(const TensorInfo& input,
}
ClDepthToSpaceWorkload::ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor& desc,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<DepthToSpaceQueueDescriptor>(desc, info)
{
m_Data.ValidateInputsOutputs("ClDepthToSpaceWorkload", 1, 1);
@@ -54,7 +55,7 @@ ClDepthToSpaceWorkload::ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor
PolymorphicPointerDowncast<IClTensorHandle>(m_Data.m_Outputs[0])->GetTensor();
output.info()->set_data_layout(aclDataLayout);
- m_Layer.configure(&input, &output, blockSize);
+ m_Layer.configure(clCompileContext, &input, &output, blockSize);
}
void ClDepthToSpaceWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp b/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp
index de8b496669..6cb8bb5e9e 100644
--- a/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp
+++ b/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp
@@ -21,7 +21,9 @@ arm_compute::Status ClDepthToSpaceWorkloadValidate(const TensorInfo& input,
class ClDepthToSpaceWorkload : public BaseWorkload<DepthToSpaceQueueDescriptor>
{
public:
- ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
index 53f16848eb..655f0c9c35 100644
--- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
+++ b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
@@ -75,7 +75,8 @@ arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& inp
ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload(
const DepthwiseConvolution2dQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
{
// Allocate a buffer for the swizzling of the weight tensor
@@ -124,6 +125,7 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload(
m_DepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer>();
static_cast<arm_compute::CLDepthwiseConvolutionLayer*>(m_DepthwiseConvolutionLayer.get())->configure(
+ clCompileContext,
&input,
m_KernelTensor.get(),
m_BiasTensor.get(),
diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp
index c75913737d..d490012cd9 100644
--- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp
+++ b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp
@@ -27,7 +27,8 @@ public:
using BaseWorkload<DepthwiseConvolution2dQueueDescriptor>::m_Data;
ClDepthwiseConvolutionWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor,
- const WorkloadInfo& info);
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClDequantizeWorkload.cpp b/src/backends/cl/workloads/ClDequantizeWorkload.cpp
index eb63900380..52d8fab93c 100644
--- a/src/backends/cl/workloads/ClDequantizeWorkload.cpp
+++ b/src/backends/cl/workloads/ClDequantizeWorkload.cpp
@@ -28,7 +28,8 @@ arm_compute::Status ClDequantizeWorkloadValidate(const TensorInfo& input, const
}
ClDequantizeWorkload::ClDequantizeWorkload(const DequantizeQueueDescriptor& descriptor,
- const WorkloadInfo& workloadInfo)
+ const WorkloadInfo& workloadInfo,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<DequantizeQueueDescriptor>(descriptor, workloadInfo)
{
m_Data.ValidateInputsOutputs("ClDequantizeWorkload", 1, 1);
@@ -40,7 +41,7 @@ ClDequantizeWorkload::ClDequantizeWorkload(const DequantizeQueueDescriptor& desc
m_Data.m_Outputs[0])->GetTensor();
m_Layer.reset(new arm_compute::CLDequantizationLayer());
- m_Layer->configure(&input, &output);
+ m_Layer->configure(clCompileContext, &input, &output);
m_Layer->prepare();
}
diff --git a/src/backends/cl/workloads/ClDequantizeWorkload.hpp b/src/backends/cl/workloads/ClDequantizeWorkload.hpp
index 6e61da2ebf..628ea20f1b 100644
--- a/src/backends/cl/workloads/ClDequantizeWorkload.hpp
+++ b/src/backends/cl/workloads/ClDequantizeWorkload.hpp
@@ -17,7 +17,9 @@ arm_compute::Status ClDequantizeWorkloadValidate(const TensorInfo& input, const
class ClDequantizeWorkload : public BaseWorkload<DequantizeQueueDescriptor>
{
public:
- ClDequantizeWorkload(const DequantizeQueueDescriptor& descriptor, const WorkloadInfo& workloadInfo);
+ ClDequantizeWorkload(const DequantizeQueueDescriptor& descriptor,
+ const WorkloadInfo& workloadInfo,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp b/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp
index c79e55ebdd..3df8d52f6d 100644
--- a/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp
@@ -32,7 +32,8 @@ arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0,
ClDivisionFloatWorkload::ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: FloatWorkload<DivisionQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClDivisionFloatWorkload", 2, 1);
@@ -43,7 +44,7 @@ ClDivisionFloatWorkload::ClDivisionFloatWorkload(const DivisionQueueDescriptor&
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
- m_ArithmeticDivision.configure(&input0, &input1, &output, activationInfo);
+ m_ArithmeticDivision.configure(clCompileContext, &input0, &input1, &output, activationInfo);
}
void ClDivisionFloatWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp b/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp
index 71d27ed5b5..481b8b0a89 100644
--- a/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp
+++ b/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp
@@ -20,8 +20,9 @@ arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0,
class ClDivisionFloatWorkload : public FloatWorkload<DivisionQueueDescriptor>
{
public:
- ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor, const
- WorkloadInfo& info);
+ ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
using FloatWorkload<DivisionQueueDescriptor>::FloatWorkload;
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClExpWorkload.cpp b/src/backends/cl/workloads/ClExpWorkload.cpp
index abf4181286..60c383f8bf 100644
--- a/src/backends/cl/workloads/ClExpWorkload.cpp
+++ b/src/backends/cl/workloads/ClExpWorkload.cpp
@@ -23,7 +23,9 @@ arm_compute::Status ClExpWorkloadValidate(const TensorInfo& input, const TensorI
return arm_compute::CLExpLayer::validate(&aclInput, &aclOutput);
}
-ClExpWorkload::ClExpWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClExpWorkload::ClExpWorkload(const ElementwiseUnaryQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<ElementwiseUnaryQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClExpWorkload", 1, 1);
@@ -31,7 +33,7 @@ ClExpWorkload::ClExpWorkload(const ElementwiseUnaryQueueDescriptor& descriptor,
arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_ExpLayer.configure(&input, &output);
+ m_ExpLayer.configure(clCompileContext, &input, &output);
}
void ClExpWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClExpWorkload.hpp b/src/backends/cl/workloads/ClExpWorkload.hpp
index c35aebbeb9..407fb5ed1d 100644
--- a/src/backends/cl/workloads/ClExpWorkload.hpp
+++ b/src/backends/cl/workloads/ClExpWorkload.hpp
@@ -18,7 +18,9 @@ arm_compute::Status ClExpWorkloadValidate(const TensorInfo& input, const TensorI
class ClExpWorkload : public BaseWorkload<ElementwiseUnaryQueueDescriptor>
{
public:
- ClExpWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClExpWorkload(const ElementwiseUnaryQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClFillWorkload.cpp b/src/backends/cl/workloads/ClFillWorkload.cpp
index 47a70bc677..a2204fa42d 100644
--- a/src/backends/cl/workloads/ClFillWorkload.cpp
+++ b/src/backends/cl/workloads/ClFillWorkload.cpp
@@ -15,7 +15,9 @@ namespace armnn
{
using namespace armcomputetensorutils;
-ClFillWorkload::ClFillWorkload(const FillQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClFillWorkload::ClFillWorkload(const FillQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<FillQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClFillWorkload", 1, 1);
@@ -23,7 +25,7 @@ ClFillWorkload::ClFillWorkload(const FillQueueDescriptor& descriptor, const Work
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
arm_compute::PixelValue pixelValue = GetPixelValue(output, descriptor.m_Parameters.m_Value);
- m_Layer.configure(&output, pixelValue);
+ m_Layer.configure(clCompileContext, &output, pixelValue);
}
void ClFillWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClFillWorkload.hpp b/src/backends/cl/workloads/ClFillWorkload.hpp
index 8919445d0c..8539501f17 100644
--- a/src/backends/cl/workloads/ClFillWorkload.hpp
+++ b/src/backends/cl/workloads/ClFillWorkload.hpp
@@ -14,7 +14,9 @@ namespace armnn {
class ClFillWorkload : public BaseWorkload<FillQueueDescriptor>
{
public:
- ClFillWorkload(const FillQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClFillWorkload(const FillQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClFloorFloatWorkload.cpp b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp
index f38342ed39..3915270c24 100644
--- a/src/backends/cl/workloads/ClFloorFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp
@@ -20,7 +20,9 @@ arm_compute::Status ClFloorWorkloadValidate(const TensorInfo& input,
return arm_compute::CLFloor::validate(&aclInput, &aclOutput);
}
-ClFloorFloatWorkload::ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClFloorFloatWorkload::ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: FloatWorkload<FloorQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClFloorFloatWorkload", 1, 1);
@@ -28,7 +30,7 @@ ClFloorFloatWorkload::ClFloorFloatWorkload(const FloorQueueDescriptor& descripto
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_Layer.configure(&input, &output);
+ m_Layer.configure(clCompileContext, &input, &output);
}
void ClFloorFloatWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClFloorFloatWorkload.hpp b/src/backends/cl/workloads/ClFloorFloatWorkload.hpp
index 1ddaddff0b..1c755c5b57 100644
--- a/src/backends/cl/workloads/ClFloorFloatWorkload.hpp
+++ b/src/backends/cl/workloads/ClFloorFloatWorkload.hpp
@@ -18,7 +18,9 @@ arm_compute::Status ClFloorWorkloadValidate(const TensorInfo& input,
class ClFloorFloatWorkload : public FloatWorkload<FloorQueueDescriptor>
{
public:
- ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
index eaec639f28..9135d27376 100644
--- a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
+++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
@@ -45,8 +45,11 @@ arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input,
fullyConnectedLayerInfo);
}
-ClFullyConnectedWorkload::ClFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor,
- const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ClFullyConnectedWorkload::ClFullyConnectedWorkload(
+ const FullyConnectedQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info)
, m_FullyConnectedLayer(memoryManager)
{
@@ -69,7 +72,12 @@ ClFullyConnectedWorkload::ClFullyConnectedWorkload(const FullyConnectedQueueDesc
arm_compute::FullyConnectedLayerInfo fc_info =
ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo);
- m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info);
+ m_FullyConnectedLayer.configure(clCompileContext,
+ &input,
+ m_WeightsTensor.get(),
+ m_BiasesTensor.get(),
+ &output,
+ fc_info);
InitializeArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight);
diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp
index 311b59498b..45394da97f 100644
--- a/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp
+++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp
@@ -27,7 +27,8 @@ class ClFullyConnectedWorkload : public armnn::BaseWorkload<armnn::FullyConnecte
public:
ClFullyConnectedWorkload(const armnn::FullyConnectedQueueDescriptor& descriptor,
const armnn::WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const arm_compute::CLCompileContext& clCompileContext);
using armnn::BaseWorkload<armnn::FullyConnectedQueueDescriptor>::m_Data;
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClGatherWorkload.cpp b/src/backends/cl/workloads/ClGatherWorkload.cpp
index c76b9c7a17..98dfe7bc81 100644
--- a/src/backends/cl/workloads/ClGatherWorkload.cpp
+++ b/src/backends/cl/workloads/ClGatherWorkload.cpp
@@ -27,7 +27,8 @@ arm_compute::Status ClGatherWorkloadValidate(const TensorInfo& input,
}
ClGatherWorkload::ClGatherWorkload(const GatherQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<GatherQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClGatherWorkload", 1, 1);
@@ -38,7 +39,7 @@ ClGatherWorkload::ClGatherWorkload(const GatherQueueDescriptor& descriptor,
int aclAxis = ComputeAclAxis(descriptor.m_Parameters.m_Axis, info.m_InputTensorInfos[0]);
- m_Layer.configure(&input, &indices, &output, aclAxis);
+ m_Layer.configure(clCompileContext, &input, &indices, &output, aclAxis);
};
void ClGatherWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClGatherWorkload.hpp b/src/backends/cl/workloads/ClGatherWorkload.hpp
index df71a99fa0..8199aaf338 100644
--- a/src/backends/cl/workloads/ClGatherWorkload.hpp
+++ b/src/backends/cl/workloads/ClGatherWorkload.hpp
@@ -19,7 +19,9 @@ arm_compute::Status ClGatherWorkloadValidate(const TensorInfo& input,
class ClGatherWorkload : public BaseWorkload<GatherQueueDescriptor>
{
public:
- ClGatherWorkload(const GatherQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClGatherWorkload(const GatherQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp
index 50cf345a7f..a0e8e7b87d 100644
--- a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp
+++ b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp
@@ -31,7 +31,8 @@ arm_compute::Status ClInstanceNormalizationWorkloadValidate(const TensorInfo& in
ClInstanceNormalizationWorkload::ClInstanceNormalizationWorkload(
const InstanceNormalizationQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<InstanceNormalizationQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClInstanceNormalizationWorkload", 1, 1);
@@ -43,7 +44,8 @@ ClInstanceNormalizationWorkload::ClInstanceNormalizationWorkload(
input.info()->set_data_layout(aclDataLayout);
output.info()->set_data_layout(aclDataLayout);
- m_Layer.configure(&input,
+ m_Layer.configure(clCompileContext,
+ &input,
&output,
descriptor.m_Parameters.m_Gamma,
descriptor.m_Parameters.m_Beta,
diff --git a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.hpp b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.hpp
index 0e37bdcc9b..957ba34685 100644
--- a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.hpp
+++ b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.hpp
@@ -19,7 +19,9 @@ arm_compute::Status ClInstanceNormalizationWorkloadValidate(const TensorInfo& in
class ClInstanceNormalizationWorkload : public BaseWorkload<InstanceNormalizationQueueDescriptor>
{
public:
- ClInstanceNormalizationWorkload(const InstanceNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClInstanceNormalizationWorkload(const InstanceNormalizationQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp
index e1a61277d5..bd38219a3e 100644
--- a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp
@@ -27,7 +27,8 @@ arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input,
}
ClL2NormalizationFloatWorkload::ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: FloatWorkload<L2NormalizationQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClL2NormalizationFloatWorkload", 1, 1);
@@ -41,7 +42,7 @@ ClL2NormalizationFloatWorkload::ClL2NormalizationFloatWorkload(const L2Normaliza
int axis = (m_Data.m_Parameters.m_DataLayout == DataLayout::NCHW) ? 2 : 0;
- m_Layer.configure(&input, &output, axis, m_Data.m_Parameters.m_Eps);
+ m_Layer.configure(clCompileContext, &input, &output, axis, m_Data.m_Parameters.m_Eps);
}
void ClL2NormalizationFloatWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp
index 26aea9fd88..8648da4492 100644
--- a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp
+++ b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp
@@ -19,7 +19,9 @@ arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input,
class ClL2NormalizationFloatWorkload : public FloatWorkload<L2NormalizationQueueDescriptor>
{
public:
- ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp b/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp
index 6d53523291..1a255f13f6 100644
--- a/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp
+++ b/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp
@@ -25,8 +25,10 @@ arm_compute::Status ClLogSoftmaxWorkloadValidate(const TensorInfo& input,
return arm_compute::CLLogSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta, aclAxis);
}
-ClLogSoftmaxWorkload::ClLogSoftmaxWorkload(const LogSoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ClLogSoftmaxWorkload::ClLogSoftmaxWorkload(const LogSoftmaxQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<LogSoftmaxQueueDescriptor>(descriptor, info)
, m_LogSoftmaxLayer(memoryManager)
{
@@ -36,7 +38,7 @@ ClLogSoftmaxWorkload::ClLogSoftmaxWorkload(const LogSoftmaxQueueDescriptor& desc
arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
int aclAxis = ComputeAclAxis(m_Data.m_Parameters.m_Axis, info.m_InputTensorInfos[0]);
- m_LogSoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta, aclAxis);
+ m_LogSoftmaxLayer.configure(clCompileContext, &input, &output, m_Data.m_Parameters.m_Beta, aclAxis);
}
void ClLogSoftmaxWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClLogSoftmaxWorkload.hpp b/src/backends/cl/workloads/ClLogSoftmaxWorkload.hpp
index 9b531add19..a2835887e0 100644
--- a/src/backends/cl/workloads/ClLogSoftmaxWorkload.hpp
+++ b/src/backends/cl/workloads/ClLogSoftmaxWorkload.hpp
@@ -25,7 +25,8 @@ class ClLogSoftmaxWorkload : public BaseWorkload<LogSoftmaxQueueDescriptor>
{
public:
ClLogSoftmaxWorkload(const LogSoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClLogicalAndWorkload.cpp b/src/backends/cl/workloads/ClLogicalAndWorkload.cpp
index 9418d73c23..f04cede2f8 100644
--- a/src/backends/cl/workloads/ClLogicalAndWorkload.cpp
+++ b/src/backends/cl/workloads/ClLogicalAndWorkload.cpp
@@ -32,7 +32,8 @@ arm_compute::Status ClLogicalAndWorkloadValidate(const TensorInfo& input0,
}
ClLogicalAndWorkload::ClLogicalAndWorkload(const LogicalBinaryQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<LogicalBinaryQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClLogicalAndWorkload", 2, 1);
@@ -41,7 +42,7 @@ ClLogicalAndWorkload::ClLogicalAndWorkload(const LogicalBinaryQueueDescriptor& d
arm_compute::ICLTensor& input1 = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_LogicalAndLayer.configure(&input0, &input1, &output);
+ m_LogicalAndLayer.configure(clCompileContext, &input0, &input1, &output);
}
void ClLogicalAndWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClLogicalAndWorkload.hpp b/src/backends/cl/workloads/ClLogicalAndWorkload.hpp
index 3bf6afe9d4..c7cbf5a7cc 100644
--- a/src/backends/cl/workloads/ClLogicalAndWorkload.hpp
+++ b/src/backends/cl/workloads/ClLogicalAndWorkload.hpp
@@ -20,7 +20,9 @@ arm_compute::Status ClLogicalAndWorkloadValidate(const TensorInfo& input0,
class ClLogicalAndWorkload : public BaseWorkload<LogicalBinaryQueueDescriptor>
{
public:
- ClLogicalAndWorkload(const LogicalBinaryQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClLogicalAndWorkload(const LogicalBinaryQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClLogicalNotWorkload.cpp b/src/backends/cl/workloads/ClLogicalNotWorkload.cpp
index eb90cafd1c..475e57f8dc 100644
--- a/src/backends/cl/workloads/ClLogicalNotWorkload.cpp
+++ b/src/backends/cl/workloads/ClLogicalNotWorkload.cpp
@@ -29,7 +29,8 @@ arm_compute::Status ClLogicalNotWorkloadValidate(const TensorInfo& input,
}
ClLogicalNotWorkload::ClLogicalNotWorkload(const ElementwiseUnaryQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<ElementwiseUnaryQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClLogicalNotWorkload", 1, 1);
@@ -37,7 +38,7 @@ ClLogicalNotWorkload::ClLogicalNotWorkload(const ElementwiseUnaryQueueDescriptor
arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_LogicalNotLayer.configure(&input, &output);
+ m_LogicalNotLayer.configure(clCompileContext, &input, &output);
}
void ClLogicalNotWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClLogicalNotWorkload.hpp b/src/backends/cl/workloads/ClLogicalNotWorkload.hpp
index f1225c7ba7..9c6c3d15a6 100644
--- a/src/backends/cl/workloads/ClLogicalNotWorkload.hpp
+++ b/src/backends/cl/workloads/ClLogicalNotWorkload.hpp
@@ -18,7 +18,9 @@ arm_compute::Status ClLogicalNotWorkloadValidate(const TensorInfo& input, const
class ClLogicalNotWorkload : public BaseWorkload<ElementwiseUnaryQueueDescriptor>
{
public:
- ClLogicalNotWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClLogicalNotWorkload(const ElementwiseUnaryQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClLogicalOrWorkload.cpp b/src/backends/cl/workloads/ClLogicalOrWorkload.cpp
index e9895bfc39..355310ef5a 100644
--- a/src/backends/cl/workloads/ClLogicalOrWorkload.cpp
+++ b/src/backends/cl/workloads/ClLogicalOrWorkload.cpp
@@ -32,7 +32,8 @@ arm_compute::Status ClLogicalOrWorkloadValidate(const TensorInfo& input0,
}
ClLogicalOrWorkload::ClLogicalOrWorkload(const LogicalBinaryQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<LogicalBinaryQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClLogicalOrWorkload", 2, 1);
@@ -41,7 +42,7 @@ ClLogicalOrWorkload::ClLogicalOrWorkload(const LogicalBinaryQueueDescriptor& des
arm_compute::ICLTensor& input1 = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_LogicalOrLayer.configure(&input0, &input1, &output);
+ m_LogicalOrLayer.configure(clCompileContext, &input0, &input1, &output);
}
void ClLogicalOrWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClLogicalOrWorkload.hpp b/src/backends/cl/workloads/ClLogicalOrWorkload.hpp
index 8faabde90a..a6db9908bb 100644
--- a/src/backends/cl/workloads/ClLogicalOrWorkload.hpp
+++ b/src/backends/cl/workloads/ClLogicalOrWorkload.hpp
@@ -20,7 +20,9 @@ arm_compute::Status ClLogicalOrWorkloadValidate(const TensorInfo& input0,
class ClLogicalOrWorkload : public BaseWorkload<LogicalBinaryQueueDescriptor>
{
public:
- ClLogicalOrWorkload(const LogicalBinaryQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClLogicalOrWorkload(const LogicalBinaryQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp
index fe9b45e054..58cc735704 100644
--- a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp
@@ -19,7 +19,9 @@ namespace armnn
{
using namespace armcomputetensorutils;
-ClLstmFloatWorkload::ClLstmFloatWorkload(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info)
+ClLstmFloatWorkload::ClLstmFloatWorkload(const LstmQueueDescriptor &descriptor,
+ const WorkloadInfo &info,
+ const arm_compute::CLCompileContext& clCompileContext)
: FloatWorkload<LstmQueueDescriptor>(descriptor, info)
{
arm_compute::LSTMParams<arm_compute::ICLTensor> lstm_param;
@@ -185,11 +187,12 @@ ClLstmFloatWorkload::ClLstmFloatWorkload(const LstmQueueDescriptor &descriptor,
throw armnn::Exception("Wrong Type of Activation Function!");
}
- m_LstmLayer.configure(&input, m_InputToForgetWeightsTensor.get(), m_InputToCellWeightsTensor.get(),
- m_InputToOutputWeightsTensor.get(), m_RecurrentToForgetWeightsTensor.get(),
- m_RecurrentToCellWeightsTensor.get(), m_RecurrentToOutputWeightsTensor.get(),
- m_ForgetGateBiasTensor.get(), m_CellBiasTensor.get(), m_OutputGateBiasTensor.get(),
- &output_state_in, &cell_state_in, m_ScratchBuffer.get(), &output_state_out,
+ m_LstmLayer.configure(clCompileContext, &input, m_InputToForgetWeightsTensor.get(),
+ m_InputToCellWeightsTensor.get(), m_InputToOutputWeightsTensor.get(),
+ m_RecurrentToForgetWeightsTensor.get(), m_RecurrentToCellWeightsTensor.get(),
+ m_RecurrentToOutputWeightsTensor.get(), m_ForgetGateBiasTensor.get(),
+ m_CellBiasTensor.get(), m_OutputGateBiasTensor.get(), &output_state_in,
+ &cell_state_in, m_ScratchBuffer.get(), &output_state_out,
&cell_state_out, &output, lstm_param, activationLayerInfo,
cell_threshold, projection_threshold);
diff --git a/src/backends/cl/workloads/ClLstmFloatWorkload.hpp b/src/backends/cl/workloads/ClLstmFloatWorkload.hpp
index b7cb408b40..b0847503d9 100644
--- a/src/backends/cl/workloads/ClLstmFloatWorkload.hpp
+++ b/src/backends/cl/workloads/ClLstmFloatWorkload.hpp
@@ -18,7 +18,9 @@ namespace armnn
class ClLstmFloatWorkload : public FloatWorkload<LstmQueueDescriptor>
{
public:
- ClLstmFloatWorkload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClLstmFloatWorkload(const LstmQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClMaximumWorkload.cpp b/src/backends/cl/workloads/ClMaximumWorkload.cpp
index cd3192d186..85bea47f21 100644
--- a/src/backends/cl/workloads/ClMaximumWorkload.cpp
+++ b/src/backends/cl/workloads/ClMaximumWorkload.cpp
@@ -37,7 +37,8 @@ arm_compute::Status ClMaximumWorkloadValidate(const TensorInfo& input0,
}
ClMaximumWorkload::ClMaximumWorkload(const MaximumQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<MaximumQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClMaximumWorkload", 2, 1);
@@ -46,7 +47,7 @@ ClMaximumWorkload::ClMaximumWorkload(const MaximumQueueDescriptor& descriptor,
arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_MaximumLayer.configure(&input0, &input1, &output);
+ m_MaximumLayer.configure(clCompileContext, &input0, &input1, &output);
}
void ClMaximumWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClMaximumWorkload.hpp b/src/backends/cl/workloads/ClMaximumWorkload.hpp
index 18f67cddf6..f6681c79a3 100644
--- a/src/backends/cl/workloads/ClMaximumWorkload.hpp
+++ b/src/backends/cl/workloads/ClMaximumWorkload.hpp
@@ -19,7 +19,9 @@ arm_compute::Status ClMaximumWorkloadValidate(const TensorInfo& input0,
class ClMaximumWorkload : public BaseWorkload<MaximumQueueDescriptor>
{
public:
- ClMaximumWorkload(const MaximumQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClMaximumWorkload(const MaximumQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClMeanWorkload.cpp b/src/backends/cl/workloads/ClMeanWorkload.cpp
index 470b6a883d..4cc0f7c1c2 100644
--- a/src/backends/cl/workloads/ClMeanWorkload.cpp
+++ b/src/backends/cl/workloads/ClMeanWorkload.cpp
@@ -28,7 +28,9 @@ arm_compute::Status ClMeanValidate(const TensorInfo& input,
return arm_compute::CLReduceMean::validate(&aclInputInfo, coords, desc.m_KeepDims, &aclOutputInfo);
}
-ClMeanWorkload::ClMeanWorkload(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClMeanWorkload::ClMeanWorkload(const MeanQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<MeanQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClMeanWorkload", 1, 1);
@@ -40,7 +42,7 @@ ClMeanWorkload::ClMeanWorkload(const MeanQueueDescriptor& descriptor, const Work
info.m_InputTensorInfos[0].GetNumDimensions(),
m_Data.m_Parameters.m_Axis);
- m_Layer.configure(&input, coords, m_Data.m_Parameters.m_KeepDims, &output);
+ m_Layer.configure(clCompileContext, &input, coords, m_Data.m_Parameters.m_KeepDims, &output);
}
void ClMeanWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClMeanWorkload.hpp b/src/backends/cl/workloads/ClMeanWorkload.hpp
index 127c0548b1..04e9fe23f2 100644
--- a/src/backends/cl/workloads/ClMeanWorkload.hpp
+++ b/src/backends/cl/workloads/ClMeanWorkload.hpp
@@ -19,7 +19,9 @@ arm_compute::Status ClMeanValidate(const TensorInfo& input,
class ClMeanWorkload : public BaseWorkload<MeanQueueDescriptor>
{
public:
- ClMeanWorkload(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClMeanWorkload(const MeanQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClMinimumWorkload.cpp b/src/backends/cl/workloads/ClMinimumWorkload.cpp
index 5f8dfdb8eb..07a78b5008 100644
--- a/src/backends/cl/workloads/ClMinimumWorkload.cpp
+++ b/src/backends/cl/workloads/ClMinimumWorkload.cpp
@@ -37,7 +37,8 @@ arm_compute::Status ClMinimumWorkloadValidate(const TensorInfo& input0,
}
ClMinimumWorkload::ClMinimumWorkload(const MinimumQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<MinimumQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClMinimumWorkload", 2, 1);
@@ -46,7 +47,7 @@ ClMinimumWorkload::ClMinimumWorkload(const MinimumQueueDescriptor& descriptor,
arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_MinimumLayer.configure(&input0, &input1, &output);
+ m_MinimumLayer.configure(clCompileContext, &input0, &input1, &output);
}
void ClMinimumWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClMinimumWorkload.hpp b/src/backends/cl/workloads/ClMinimumWorkload.hpp
index 55d7eea9dd..34e7bb8d14 100644
--- a/src/backends/cl/workloads/ClMinimumWorkload.hpp
+++ b/src/backends/cl/workloads/ClMinimumWorkload.hpp
@@ -19,7 +19,9 @@ arm_compute::Status ClMinimumWorkloadValidate(const TensorInfo& input0,
class ClMinimumWorkload : public BaseWorkload<MinimumQueueDescriptor>
{
public:
- ClMinimumWorkload(const MinimumQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClMinimumWorkload(const MinimumQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp b/src/backends/cl/workloads/ClMultiplicationWorkload.cpp
index 46a1c4bc59..31e9d022cc 100644
--- a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp
+++ b/src/backends/cl/workloads/ClMultiplicationWorkload.cpp
@@ -45,7 +45,8 @@ arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0,
ClMultiplicationWorkload::ClMultiplicationWorkload(const MultiplicationQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<MultiplicationQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClMultiplicationWorkload", 2, 1);
@@ -62,7 +63,8 @@ ClMultiplicationWorkload::ClMultiplicationWorkload(const MultiplicationQueueDesc
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
// Construct
- m_PixelWiseMultiplication.configure(&input0,
+ m_PixelWiseMultiplication.configure(clCompileContext,
+ &input0,
&input1,
&output,
1.0f,
diff --git a/src/backends/cl/workloads/ClMultiplicationWorkload.hpp b/src/backends/cl/workloads/ClMultiplicationWorkload.hpp
index 461449cc35..424f3d7045 100644
--- a/src/backends/cl/workloads/ClMultiplicationWorkload.hpp
+++ b/src/backends/cl/workloads/ClMultiplicationWorkload.hpp
@@ -20,7 +20,9 @@ arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0,
class ClMultiplicationWorkload : public BaseWorkload<MultiplicationQueueDescriptor>
{
public:
- ClMultiplicationWorkload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClMultiplicationWorkload(const MultiplicationQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
using BaseWorkload<MultiplicationQueueDescriptor>::BaseWorkload;
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClNegWorkload.cpp b/src/backends/cl/workloads/ClNegWorkload.cpp
index 27886624b0..7505ab608a 100644
--- a/src/backends/cl/workloads/ClNegWorkload.cpp
+++ b/src/backends/cl/workloads/ClNegWorkload.cpp
@@ -23,7 +23,9 @@ arm_compute::Status ClNegWorkloadValidate(const TensorInfo& input, const TensorI
return arm_compute::CLNegLayer::validate(&aclInput, &aclOutput);
}
-ClNegWorkload::ClNegWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClNegWorkload::ClNegWorkload(const ElementwiseUnaryQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<ElementwiseUnaryQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClNegWorkload", 1, 1);
@@ -31,7 +33,7 @@ ClNegWorkload::ClNegWorkload(const ElementwiseUnaryQueueDescriptor& descriptor,
arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_NegLayer.configure(&input, &output);
+ m_NegLayer.configure(clCompileContext, &input, &output);
}
void ClNegWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClNegWorkload.hpp b/src/backends/cl/workloads/ClNegWorkload.hpp
index 9dbfa07665..7ee35cb87a 100644
--- a/src/backends/cl/workloads/ClNegWorkload.hpp
+++ b/src/backends/cl/workloads/ClNegWorkload.hpp
@@ -18,7 +18,9 @@ arm_compute::Status ClNegWorkloadValidate(const TensorInfo& input, const TensorI
class ClNegWorkload : public BaseWorkload<ElementwiseUnaryQueueDescriptor>
{
public:
- ClNegWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClNegWorkload(const ElementwiseUnaryQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp
index 5f2fd7ab83..290d29ae52 100644
--- a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp
@@ -29,7 +29,8 @@ arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input,
}
ClNormalizationFloatWorkload::ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: FloatWorkload<NormalizationQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClNormalizationFloatWorkload", 1, 1);
@@ -43,7 +44,7 @@ ClNormalizationFloatWorkload::ClNormalizationFloatWorkload(const NormalizationQu
arm_compute::NormalizationLayerInfo normalizationInfo = BuildArmComputeNormalizationLayerInfo(m_Data.m_Parameters);
- m_NormalizationLayer.configure(&input, &output, normalizationInfo);
+ m_NormalizationLayer.configure(clCompileContext, &input, &output, normalizationInfo);
};
void ClNormalizationFloatWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp
index a6d4f25e5e..dd309b44c2 100644
--- a/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp
+++ b/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp
@@ -19,7 +19,9 @@ arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input,
class ClNormalizationFloatWorkload : public FloatWorkload<NormalizationQueueDescriptor>
{
public:
- ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClPadWorkload.cpp b/src/backends/cl/workloads/ClPadWorkload.cpp
index 8a8c34a212..533855c295 100644
--- a/src/backends/cl/workloads/ClPadWorkload.cpp
+++ b/src/backends/cl/workloads/ClPadWorkload.cpp
@@ -15,7 +15,9 @@ namespace armnn
{
using namespace armcomputetensorutils;
-ClPadWorkload::ClPadWorkload(const PadQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClPadWorkload::ClPadWorkload(const PadQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<PadQueueDescriptor>(descriptor, info)
{
this->m_Data.ValidateInputsOutputs("ClPadWorkload", 1, 1);
@@ -33,7 +35,7 @@ ClPadWorkload::ClPadWorkload(const PadQueueDescriptor& descriptor, const Workloa
arm_compute::PixelValue pixelValue = GetPixelValue(input, descriptor.m_Parameters.m_PadValue);
- m_Layer.configure(&input, &output, padList, pixelValue);
+ m_Layer.configure(clCompileContext, &input, &output, padList, pixelValue);
}
void ClPadWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClPadWorkload.hpp b/src/backends/cl/workloads/ClPadWorkload.hpp
index e87cbb71c2..ac9b4b7c65 100644
--- a/src/backends/cl/workloads/ClPadWorkload.hpp
+++ b/src/backends/cl/workloads/ClPadWorkload.hpp
@@ -14,7 +14,9 @@ namespace armnn {
class ClPadWorkload : public BaseWorkload<PadQueueDescriptor>
{
public:
- ClPadWorkload(const PadQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClPadWorkload(const PadQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClPermuteWorkload.cpp b/src/backends/cl/workloads/ClPermuteWorkload.cpp
index 41bce1d4fa..5aadc7629e 100644
--- a/src/backends/cl/workloads/ClPermuteWorkload.cpp
+++ b/src/backends/cl/workloads/ClPermuteWorkload.cpp
@@ -27,7 +27,8 @@ arm_compute::Status ClPermuteWorkloadValidate(const TensorInfo& input,
}
ClPermuteWorkload::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<PermuteQueueDescriptor>(descriptor, info)
{
using armcomputetensorutils::BuildArmComputePermutationVector;
@@ -39,7 +40,7 @@ ClPermuteWorkload::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor,
const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings;
// Run the layer.
- m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings));
+ m_PermuteFunction.configure(clCompileContext, &input, &output, BuildArmComputePermutationVector(mappings));
}
void ClPermuteWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClPermuteWorkload.hpp b/src/backends/cl/workloads/ClPermuteWorkload.hpp
index 8b5f4c6147..2df2b26a11 100644
--- a/src/backends/cl/workloads/ClPermuteWorkload.hpp
+++ b/src/backends/cl/workloads/ClPermuteWorkload.hpp
@@ -29,7 +29,9 @@ public:
return name;
}
- ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClPermuteWorkload(const PermuteQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClPooling2dWorkload.cpp b/src/backends/cl/workloads/ClPooling2dWorkload.cpp
index 683880439f..c7cc10218a 100644
--- a/src/backends/cl/workloads/ClPooling2dWorkload.cpp
+++ b/src/backends/cl/workloads/ClPooling2dWorkload.cpp
@@ -28,7 +28,9 @@ arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input,
}
ClPooling2dWorkload::ClPooling2dWorkload(
- const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info)
+ const Pooling2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<Pooling2dQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClPooling2dWorkload", 1, 1);
@@ -48,7 +50,7 @@ ClPooling2dWorkload::ClPooling2dWorkload(
arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters, fpMixedPrecision);
// Run the layer.
- m_PoolingLayer.configure(&input, &output, layerInfo);
+ m_PoolingLayer.configure(clCompileContext, &input, &output, layerInfo);
}
void ClPooling2dWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClPooling2dWorkload.hpp b/src/backends/cl/workloads/ClPooling2dWorkload.hpp
index ce67db2a13..f4117aca2e 100644
--- a/src/backends/cl/workloads/ClPooling2dWorkload.hpp
+++ b/src/backends/cl/workloads/ClPooling2dWorkload.hpp
@@ -22,7 +22,8 @@ public:
using BaseWorkload<Pooling2dQueueDescriptor>::m_Data;
ClPooling2dWorkload(const Pooling2dQueueDescriptor& descriptor,
- const WorkloadInfo& info);
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClPreluWorkload.cpp b/src/backends/cl/workloads/ClPreluWorkload.cpp
index 1813105c21..73fa887532 100644
--- a/src/backends/cl/workloads/ClPreluWorkload.cpp
+++ b/src/backends/cl/workloads/ClPreluWorkload.cpp
@@ -27,7 +27,8 @@ arm_compute::Status ClPreluWorkloadValidate(const TensorInfo& input,
}
ClPreluWorkload::ClPreluWorkload(const PreluQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<PreluQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClPreluWorkload", 1, 1);
@@ -36,7 +37,7 @@ ClPreluWorkload::ClPreluWorkload(const PreluQueueDescriptor& descriptor,
arm_compute::ICLTensor& alpha = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_PreluLayer.configure(&input, &alpha, &output);
+ m_PreluLayer.configure(clCompileContext, &input, &alpha, &output);
}
void ClPreluWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClPreluWorkload.hpp b/src/backends/cl/workloads/ClPreluWorkload.hpp
index 9061416431..ac8932c9d1 100644
--- a/src/backends/cl/workloads/ClPreluWorkload.hpp
+++ b/src/backends/cl/workloads/ClPreluWorkload.hpp
@@ -18,7 +18,9 @@ arm_compute::Status ClPreluWorkloadValidate(const TensorInfo& input,
class ClPreluWorkload : public BaseWorkload<PreluQueueDescriptor>
{
public:
- ClPreluWorkload(const PreluQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClPreluWorkload(const PreluQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClQLstmWorkload.cpp b/src/backends/cl/workloads/ClQLstmWorkload.cpp
index 7ece05f5ff..0ae371575b 100644
--- a/src/backends/cl/workloads/ClQLstmWorkload.cpp
+++ b/src/backends/cl/workloads/ClQLstmWorkload.cpp
@@ -14,7 +14,9 @@ namespace armnn
{
using namespace armcomputetensorutils;
-ClQLstmWorkload::ClQLstmWorkload(const QLstmQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClQLstmWorkload::ClQLstmWorkload(const QLstmQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<QLstmQueueDescriptor>(descriptor, info)
{
arm_compute::LSTMParams<arm_compute::ICLTensor> qLstmParams;
@@ -150,8 +152,9 @@ ClQLstmWorkload::ClQLstmWorkload(const QLstmQueueDescriptor& descriptor, const W
m_Data.m_Parameters.m_CellIntermediateScale,
m_Data.m_Parameters.m_OutputIntermediateScale);
- // QLSTM NEON configure
- m_QLstmLayer.configure(&input,
+ // QLSTM CL configure
+ m_QLstmLayer.configure(clCompileContext,
+ &input,
m_InputToForgetWeightsTensor.get(),
m_InputToCellWeightsTensor.get(),
m_InputToOutputWeightsTensor.get(),
diff --git a/src/backends/cl/workloads/ClQLstmWorkload.hpp b/src/backends/cl/workloads/ClQLstmWorkload.hpp
index f98c9b3f9a..6758abcde9 100644
--- a/src/backends/cl/workloads/ClQLstmWorkload.hpp
+++ b/src/backends/cl/workloads/ClQLstmWorkload.hpp
@@ -19,7 +19,9 @@ namespace armnn
class ClQLstmWorkload : public BaseWorkload<QLstmQueueDescriptor>
{
public:
- ClQLstmWorkload(const QLstmQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClQLstmWorkload(const QLstmQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClQuantizeWorkload.cpp b/src/backends/cl/workloads/ClQuantizeWorkload.cpp
index 263065a5a4..5c945e0921 100644
--- a/src/backends/cl/workloads/ClQuantizeWorkload.cpp
+++ b/src/backends/cl/workloads/ClQuantizeWorkload.cpp
@@ -29,7 +29,9 @@ arm_compute::Status ClQuantizeWorkloadValidate(const TensorInfo& input,
&aclOutputInfo);
}
-ClQuantizeWorkload::ClQuantizeWorkload(const QuantizeQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClQuantizeWorkload::ClQuantizeWorkload(const QuantizeQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<QuantizeQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClQuantizeWorkload", 1, 1);
@@ -37,7 +39,7 @@ ClQuantizeWorkload::ClQuantizeWorkload(const QuantizeQueueDescriptor& descriptor
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_Layer.configure(&input, &output);
+ m_Layer.configure(clCompileContext, &input, &output);
}
void ClQuantizeWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClQuantizeWorkload.hpp b/src/backends/cl/workloads/ClQuantizeWorkload.hpp
index f4a7ec64dd..9bb1572c5c 100644
--- a/src/backends/cl/workloads/ClQuantizeWorkload.hpp
+++ b/src/backends/cl/workloads/ClQuantizeWorkload.hpp
@@ -18,7 +18,9 @@ arm_compute::Status ClQuantizeWorkloadValidate(const TensorInfo& input,
class ClQuantizeWorkload : public BaseWorkload<QuantizeQueueDescriptor>
{
public:
- ClQuantizeWorkload(const QuantizeQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClQuantizeWorkload(const QuantizeQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp b/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp
index 688ebf9184..636bdecbeb 100644
--- a/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp
+++ b/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp
@@ -62,7 +62,8 @@ arm_compute::Status ClQuantizedLstmWorkloadValidate(const TensorInfo& input, con
}
ClQuantizedLstmWorkload::ClQuantizedLstmWorkload(const QuantizedLstmQueueDescriptor &descriptor,
- const WorkloadInfo &info):
+ const WorkloadInfo &info,
+ const arm_compute::CLCompileContext& clCompileContext):
BaseWorkload<QuantizedLstmQueueDescriptor>(descriptor, info)
{
m_InputToInputWeightsTensor = std::make_unique<arm_compute::CLTensor>();
@@ -108,7 +109,8 @@ ClQuantizedLstmWorkload::ClQuantizedLstmWorkload(const QuantizedLstmQueueDescrip
arm_compute::ICLTensor& cellStateOutTensor = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
arm_compute::ICLTensor& outputStateOutTensor = static_cast<IClTensorHandle*>(m_Data.m_Outputs[1])->GetTensor();
- m_QuantizedLstmLayer.configure(&inputTensor, m_InputToInputWeightsTensor.get(), m_InputToForgetWeightsTensor.get(),
+ m_QuantizedLstmLayer.configure(clCompileContext, &inputTensor, m_InputToInputWeightsTensor.get(),
+ m_InputToForgetWeightsTensor.get(),
m_InputToCellWeightsTensor.get(), m_InputToOutputWeightsTensor.get(),
m_RecurrentToInputWeightsTensor.get(), m_RecurrentToForgetWeightsTensor.get(),
m_RecurrentToCellWeightsTensor.get(), m_RecurrentToOutputWeightsTensor.get(),
diff --git a/src/backends/cl/workloads/ClQuantizedLstmWorkload.hpp b/src/backends/cl/workloads/ClQuantizedLstmWorkload.hpp
index 580db490d6..6561850d79 100644
--- a/src/backends/cl/workloads/ClQuantizedLstmWorkload.hpp
+++ b/src/backends/cl/workloads/ClQuantizedLstmWorkload.hpp
@@ -22,7 +22,9 @@ arm_compute::Status ClQuantizedLstmWorkloadValidate(const TensorInfo& input, con
class ClQuantizedLstmWorkload : public BaseWorkload<QuantizedLstmQueueDescriptor>
{
public:
- ClQuantizedLstmWorkload(const QuantizedLstmQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClQuantizedLstmWorkload(const QuantizedLstmQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClReshapeWorkload.cpp b/src/backends/cl/workloads/ClReshapeWorkload.cpp
index d752290444..0988babf23 100644
--- a/src/backends/cl/workloads/ClReshapeWorkload.cpp
+++ b/src/backends/cl/workloads/ClReshapeWorkload.cpp
@@ -21,7 +21,9 @@ arm_compute::Status ClReshapeWorkloadValidate(const TensorInfo& input,
return arm_compute::CLReshapeLayer::validate(&aclInputInfo, &aclOutputInfo);
}
-ClReshapeWorkload::ClReshapeWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClReshapeWorkload::ClReshapeWorkload(const ReshapeQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<ReshapeQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClReshapeWorkload", 1, 1);
@@ -29,7 +31,7 @@ ClReshapeWorkload::ClReshapeWorkload(const ReshapeQueueDescriptor& descriptor, c
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_Layer.configure(&input, &output);
+ m_Layer.configure(clCompileContext, &input, &output);
}
void ClReshapeWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClReshapeWorkload.hpp b/src/backends/cl/workloads/ClReshapeWorkload.hpp
index d836f1e43a..70d72879f5 100644
--- a/src/backends/cl/workloads/ClReshapeWorkload.hpp
+++ b/src/backends/cl/workloads/ClReshapeWorkload.hpp
@@ -18,7 +18,9 @@ arm_compute::Status ClReshapeWorkloadValidate(const TensorInfo& input,
class ClReshapeWorkload : public BaseWorkload<ReshapeQueueDescriptor>
{
public:
- ClReshapeWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClReshapeWorkload(const ReshapeQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClResizeWorkload.cpp b/src/backends/cl/workloads/ClResizeWorkload.cpp
index 744a915616..e47740624e 100644
--- a/src/backends/cl/workloads/ClResizeWorkload.cpp
+++ b/src/backends/cl/workloads/ClResizeWorkload.cpp
@@ -46,8 +46,10 @@ arm_compute::Status ClResizeWorkloadValidate(const TensorInfo& input,
descriptor.m_AlignCorners));
}
-ClResizeWorkload::ClResizeWorkload(const ResizeQueueDescriptor& descriptor, const WorkloadInfo& info) :
- BaseWorkload<ResizeQueueDescriptor>(descriptor, info)
+ClResizeWorkload::ClResizeWorkload(const ResizeQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
+ : BaseWorkload<ResizeQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClResizeWorkload", 1, 1);
@@ -65,7 +67,8 @@ ClResizeWorkload::ClResizeWorkload(const ResizeQueueDescriptor& descriptor, cons
? arm_compute::SamplingPolicy::CENTER
: arm_compute::SamplingPolicy::TOP_LEFT;
- m_ResizeLayer.configure(&input,
+ m_ResizeLayer.configure(clCompileContext,
+ &input,
&output,
arm_compute::ScaleKernelInfo(aclInterpolationPolicy,
arm_compute::BorderMode::REPLICATE,
diff --git a/src/backends/cl/workloads/ClResizeWorkload.hpp b/src/backends/cl/workloads/ClResizeWorkload.hpp
index ab5b943457..9549a32165 100644
--- a/src/backends/cl/workloads/ClResizeWorkload.hpp
+++ b/src/backends/cl/workloads/ClResizeWorkload.hpp
@@ -19,7 +19,9 @@ arm_compute::Status ClResizeWorkloadValidate(const TensorInfo& input,
class ClResizeWorkload : public BaseWorkload<ResizeQueueDescriptor>
{
public:
- ClResizeWorkload(const ResizeQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClResizeWorkload(const ResizeQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClRsqrtWorkload.cpp b/src/backends/cl/workloads/ClRsqrtWorkload.cpp
index 48fd1e0fd0..a3a04c11eb 100644
--- a/src/backends/cl/workloads/ClRsqrtWorkload.cpp
+++ b/src/backends/cl/workloads/ClRsqrtWorkload.cpp
@@ -23,7 +23,9 @@ arm_compute::Status ClRsqrtWorkloadValidate(const TensorInfo& input, const Tenso
return arm_compute::CLRsqrtLayer::validate(&aclInput, &aclOutput);
}
-ClRsqrtWorkload::ClRsqrtWorkload(const RsqrtQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClRsqrtWorkload::ClRsqrtWorkload(const RsqrtQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<RsqrtQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClRsqrtWorkload", 1, 1);
@@ -31,7 +33,7 @@ ClRsqrtWorkload::ClRsqrtWorkload(const RsqrtQueueDescriptor& descriptor, const W
arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_RsqrtLayer.configure(&input, &output);
+ m_RsqrtLayer.configure(clCompileContext, &input, &output);
}
void ClRsqrtWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClRsqrtWorkload.hpp b/src/backends/cl/workloads/ClRsqrtWorkload.hpp
index 8fb6229160..35f84146bb 100644
--- a/src/backends/cl/workloads/ClRsqrtWorkload.hpp
+++ b/src/backends/cl/workloads/ClRsqrtWorkload.hpp
@@ -18,7 +18,9 @@ arm_compute::Status ClRsqrtWorkloadValidate(const TensorInfo& input, const Tenso
class ClRsqrtWorkload : public BaseWorkload<RsqrtQueueDescriptor>
{
public:
- ClRsqrtWorkload(const RsqrtQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClRsqrtWorkload(const RsqrtQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClSliceWorkload.cpp b/src/backends/cl/workloads/ClSliceWorkload.cpp
index d7b1dbbe48..16271961f9 100644
--- a/src/backends/cl/workloads/ClSliceWorkload.cpp
+++ b/src/backends/cl/workloads/ClSliceWorkload.cpp
@@ -30,7 +30,9 @@ arm_compute::Status ClSliceWorkloadValidate(const TensorInfo& input,
return arm_compute::CLSlice::validate(&aclInput, &aclOutput, starts, ends);
}
-ClSliceWorkload::ClSliceWorkload(const SliceQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClSliceWorkload::ClSliceWorkload(const SliceQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<SliceQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClSliceWorkload", 1, 1);
@@ -43,7 +45,7 @@ ClSliceWorkload::ClSliceWorkload(const SliceQueueDescriptor& descriptor, const W
std::tie(starts, ends) = SetClSliceData(m_Data.m_Parameters.m_Begin, m_Data.m_Parameters.m_Size);
- m_SliceFunction.configure(&input, &output, starts, ends);
+ m_SliceFunction.configure(clCompileContext, &input, &output, starts, ends);
}
void ClSliceWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClSliceWorkload.hpp b/src/backends/cl/workloads/ClSliceWorkload.hpp
index 3460b7788b..67836c2b5c 100644
--- a/src/backends/cl/workloads/ClSliceWorkload.hpp
+++ b/src/backends/cl/workloads/ClSliceWorkload.hpp
@@ -20,7 +20,9 @@ arm_compute::Status ClSliceWorkloadValidate(const TensorInfo& input,
class ClSliceWorkload : public BaseWorkload<SliceQueueDescriptor>
{
public:
- ClSliceWorkload(const SliceQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClSliceWorkload(const SliceQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClSoftmaxWorkload.cpp b/src/backends/cl/workloads/ClSoftmaxWorkload.cpp
index 8bc2a765ed..4547c682c9 100644
--- a/src/backends/cl/workloads/ClSoftmaxWorkload.cpp
+++ b/src/backends/cl/workloads/ClSoftmaxWorkload.cpp
@@ -25,8 +25,10 @@ arm_compute::Status ClSoftmaxWorkloadValidate(const TensorInfo& input,
return arm_compute::CLSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta, aclAxis);
}
-ClSoftmaxWorkload::ClSoftmaxWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ClSoftmaxWorkload::ClSoftmaxWorkload(const SoftmaxQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<SoftmaxQueueDescriptor>(descriptor, info)
, m_SoftmaxLayer(memoryManager)
{
@@ -36,7 +38,7 @@ ClSoftmaxWorkload::ClSoftmaxWorkload(const SoftmaxQueueDescriptor& descriptor, c
arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
int aclAxis = ComputeAclAxis(m_Data.m_Parameters.m_Axis, info.m_InputTensorInfos[0]);
- m_SoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta, aclAxis);
+ m_SoftmaxLayer.configure(clCompileContext, &input, &output, m_Data.m_Parameters.m_Beta, aclAxis);
}
void ClSoftmaxWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClSoftmaxWorkload.hpp b/src/backends/cl/workloads/ClSoftmaxWorkload.hpp
index 158bf46c32..1742c60511 100644
--- a/src/backends/cl/workloads/ClSoftmaxWorkload.hpp
+++ b/src/backends/cl/workloads/ClSoftmaxWorkload.hpp
@@ -23,8 +23,10 @@ arm_compute::Status ClSoftmaxWorkloadValidate(const TensorInfo& input,
class ClSoftmaxWorkload : public BaseWorkload<SoftmaxQueueDescriptor>
{
public:
- ClSoftmaxWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
+ ClSoftmaxWorkload(const SoftmaxQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp
index 443c56b7b5..7b29cded0f 100644
--- a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp
+++ b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp
@@ -45,7 +45,9 @@ arm_compute::Status ClSpaceToBatchNdWorkloadValidate(const TensorInfo& input,
}
ClSpaceToBatchNdWorkload::ClSpaceToBatchNdWorkload(
- const SpaceToBatchNdQueueDescriptor& descriptor, const WorkloadInfo& info)
+ const SpaceToBatchNdQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<SpaceToBatchNdQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClSpaceToBatchNdWorkload", 1, 1);
@@ -68,7 +70,8 @@ ClSpaceToBatchNdWorkload::ClSpaceToBatchNdWorkload(
input.info()->set_data_layout(aclDataLayout);
output.info()->set_data_layout(aclDataLayout);
- m_SpaceToBatchLayer.configure(&input,
+ m_SpaceToBatchLayer.configure(clCompileContext,
+ &input,
blockWidth,
blockHeight,
paddingLeftTop,
diff --git a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.hpp b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.hpp
index 7500b5a3ac..06d243a6d1 100644
--- a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.hpp
+++ b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.hpp
@@ -22,7 +22,9 @@ arm_compute::Status ClSpaceToBatchNdWorkloadValidate(const TensorInfo& input,
class ClSpaceToBatchNdWorkload : public BaseWorkload<SpaceToBatchNdQueueDescriptor>
{
public:
- ClSpaceToBatchNdWorkload(const SpaceToBatchNdQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClSpaceToBatchNdWorkload(const SpaceToBatchNdQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp b/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp
index f35fe0e3c9..7a590d26b6 100644
--- a/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp
+++ b/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp
@@ -18,7 +18,8 @@ namespace armnn
using namespace armcomputetensorutils;
ClSpaceToDepthWorkload::ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& desc,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<SpaceToDepthQueueDescriptor>(desc, info)
{
m_Data.ValidateInputsOutputs("ClSpaceToDepthWorkload", 1, 1);
@@ -33,7 +34,7 @@ ClSpaceToDepthWorkload::ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
output.info()->set_data_layout(aclDataLayout);
- m_Layer.configure(&input, &output, blockSize);
+ m_Layer.configure(clCompileContext, &input, &output, blockSize);
}
void ClSpaceToDepthWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp b/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp
index 57ce5d4d05..b782bbe24d 100644
--- a/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp
+++ b/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp
@@ -19,7 +19,9 @@ arm_compute::Status ClSpaceToDepthWorkloadValidate(const TensorInfo& input,
class ClSpaceToDepthWorkload : public BaseWorkload<SpaceToDepthQueueDescriptor>
{
public:
- ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClSplitterWorkload.cpp b/src/backends/cl/workloads/ClSplitterWorkload.cpp
index 045fbb7595..70a817825e 100644
--- a/src/backends/cl/workloads/ClSplitterWorkload.cpp
+++ b/src/backends/cl/workloads/ClSplitterWorkload.cpp
@@ -9,7 +9,6 @@
#include <aclCommon/ArmComputeTensorUtils.hpp>
#include <aclCommon/ArmComputeUtils.hpp>
-#include <arm_compute/runtime/CL/functions/CLSplit.h>
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <backendsCommon/CpuTensorHandle.hpp>
#include <cl/ClTensorHandle.hpp>
@@ -53,7 +52,9 @@ arm_compute::Status ClSplitterWorkloadValidate(const TensorInfo& input,
return arm_compute::CLSplit::validate(&aclInputInfo, aclOutputPtr, aclAxis);
}
-ClSplitterWorkload::ClSplitterWorkload(const SplitterQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClSplitterWorkload::ClSplitterWorkload(const SplitterQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext&)
: BaseWorkload<SplitterQueueDescriptor>(descriptor, info)
{
bool allOutputsAreSubtensors = true;
diff --git a/src/backends/cl/workloads/ClSplitterWorkload.hpp b/src/backends/cl/workloads/ClSplitterWorkload.hpp
index 82211f5baf..c59aa023bf 100644
--- a/src/backends/cl/workloads/ClSplitterWorkload.hpp
+++ b/src/backends/cl/workloads/ClSplitterWorkload.hpp
@@ -9,6 +9,7 @@
#include <arm_compute/core/Error.h>
#include <arm_compute/runtime/IFunction.h>
+#include <arm_compute/runtime/CL/functions/CLSplit.h>
#include <functional>
@@ -22,7 +23,9 @@ arm_compute::Status ClSplitterWorkloadValidate(const TensorInfo& input,
class ClSplitterWorkload : public BaseWorkload<SplitterQueueDescriptor>
{
public:
- ClSplitterWorkload(const SplitterQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClSplitterWorkload(const SplitterQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClStackWorkload.cpp b/src/backends/cl/workloads/ClStackWorkload.cpp
index c0b88b1193..749282f53a 100644
--- a/src/backends/cl/workloads/ClStackWorkload.cpp
+++ b/src/backends/cl/workloads/ClStackWorkload.cpp
@@ -44,7 +44,9 @@ arm_compute::Status ClStackWorkloadValidate(const std::vector<const TensorInfo*>
return arm_compute::CLStackLayer::validate(aclInputPtrs, aclAxis, &aclOutputInfo);
}
-ClStackWorkload::ClStackWorkload(const StackQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClStackWorkload::ClStackWorkload(const StackQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<StackQueueDescriptor>(descriptor, info)
{
std::vector<arm_compute::ICLTensor*> aclInputs;
@@ -58,7 +60,7 @@ ClStackWorkload::ClStackWorkload(const StackQueueDescriptor& descriptor, const W
m_Layer.reset(new arm_compute::CLStackLayer());
int aclAxis = CalcAxis(descriptor.m_Parameters.m_Axis, descriptor.m_Parameters.m_InputShape.GetNumDimensions());
- m_Layer->configure(aclInputs, aclAxis, &output);
+ m_Layer->configure(clCompileContext, aclInputs, aclAxis, &output);
}
void ClStackWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClStackWorkload.hpp b/src/backends/cl/workloads/ClStackWorkload.hpp
index f27d6cdad0..3f1e642f60 100644
--- a/src/backends/cl/workloads/ClStackWorkload.hpp
+++ b/src/backends/cl/workloads/ClStackWorkload.hpp
@@ -18,7 +18,9 @@ arm_compute::Status ClStackWorkloadValidate(const std::vector<const TensorInfo*>
class ClStackWorkload : public BaseWorkload<StackQueueDescriptor>
{
public:
- ClStackWorkload(const StackQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClStackWorkload(const StackQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClStridedSliceWorkload.cpp b/src/backends/cl/workloads/ClStridedSliceWorkload.cpp
index b094a910f4..92e860fc42 100644
--- a/src/backends/cl/workloads/ClStridedSliceWorkload.cpp
+++ b/src/backends/cl/workloads/ClStridedSliceWorkload.cpp
@@ -53,7 +53,8 @@ arm_compute::Status ClStridedSliceWorkloadValidate(const TensorInfo& input,
}
ClStridedSliceWorkload::ClStridedSliceWorkload(const StridedSliceQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<StridedSliceQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClStridedSliceWorkload", 1, 1);
@@ -78,7 +79,8 @@ ClStridedSliceWorkload::ClStridedSliceWorkload(const StridedSliceQueueDescriptor
input.info()->set_data_layout(aclDataLayout);
output.info()->set_data_layout(aclDataLayout);
- m_StridedSliceLayer.configure(&input,
+ m_StridedSliceLayer.configure(clCompileContext,
+ &input,
&output,
starts,
ends,
diff --git a/src/backends/cl/workloads/ClStridedSliceWorkload.hpp b/src/backends/cl/workloads/ClStridedSliceWorkload.hpp
index bce3fe13eb..1229599847 100644
--- a/src/backends/cl/workloads/ClStridedSliceWorkload.hpp
+++ b/src/backends/cl/workloads/ClStridedSliceWorkload.hpp
@@ -22,7 +22,9 @@ arm_compute::Status ClStridedSliceWorkloadValidate(const TensorInfo& input,
class ClStridedSliceWorkload : public BaseWorkload<StridedSliceQueueDescriptor>
{
public:
- ClStridedSliceWorkload(const StridedSliceQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClStridedSliceWorkload(const StridedSliceQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClSubtractionWorkload.cpp b/src/backends/cl/workloads/ClSubtractionWorkload.cpp
index 865dceb869..31e0becfd8 100644
--- a/src/backends/cl/workloads/ClSubtractionWorkload.cpp
+++ b/src/backends/cl/workloads/ClSubtractionWorkload.cpp
@@ -19,7 +19,8 @@ using namespace armcomputetensorutils;
static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
ClSubtractionWorkload::ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<SubtractionQueueDescriptor>(descriptor, info)
{
this->m_Data.ValidateInputsOutputs("ClSubtractionWorkload", 2, 1);
@@ -30,7 +31,7 @@ ClSubtractionWorkload::ClSubtractionWorkload(const SubtractionQueueDescriptor& d
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
- m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy, activationInfo);
+ m_Layer.configure(clCompileContext, &input0, &input1, &output, g_AclConvertPolicy, activationInfo);
}
void ClSubtractionWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClSubtractionWorkload.hpp b/src/backends/cl/workloads/ClSubtractionWorkload.hpp
index 9f51de645b..28440b0938 100644
--- a/src/backends/cl/workloads/ClSubtractionWorkload.hpp
+++ b/src/backends/cl/workloads/ClSubtractionWorkload.hpp
@@ -15,7 +15,9 @@ namespace armnn
class ClSubtractionWorkload : public BaseWorkload<SubtractionQueueDescriptor>
{
public:
- ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp
index 20b2104c62..ff0fd5c168 100644
--- a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp
+++ b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp
@@ -56,7 +56,8 @@ arm_compute::Status ClTransposeConvolution2dWorkloadValidate(const TensorInfo& i
ClTransposeConvolution2dWorkload::ClTransposeConvolution2dWorkload(
const TransposeConvolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) :
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const arm_compute::CLCompileContext& clCompileContext) :
BaseWorkload<TransposeConvolution2dQueueDescriptor>(descriptor, info),
m_Layer(memoryManager)
{
@@ -82,7 +83,7 @@ ClTransposeConvolution2dWorkload::ClTransposeConvolution2dWorkload(
output.info()->set_data_layout(aclDataLayout);
arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters);
- m_Layer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, padStrideInfo);
+ m_Layer.configure(clCompileContext, &input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, padStrideInfo);
InitializeArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight);
if (m_BiasesTensor)
diff --git a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.hpp b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.hpp
index b7320bf6ce..8a24e6d391 100644
--- a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.hpp
+++ b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.hpp
@@ -29,7 +29,8 @@ class ClTransposeConvolution2dWorkload : public BaseWorkload<TransposeConvolutio
public:
ClTransposeConvolution2dWorkload(const TransposeConvolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClTransposeWorkload.cpp b/src/backends/cl/workloads/ClTransposeWorkload.cpp
index b276b229f6..7ef502eb8d 100644
--- a/src/backends/cl/workloads/ClTransposeWorkload.cpp
+++ b/src/backends/cl/workloads/ClTransposeWorkload.cpp
@@ -27,7 +27,8 @@ arm_compute::Status ClTransposeWorkloadValidate(const TensorInfo& input,
}
ClTransposeWorkload::ClTransposeWorkload(const TransposeQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<TransposeQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs(GetName(), 1, 1);
@@ -36,7 +37,9 @@ ClTransposeWorkload::ClTransposeWorkload(const TransposeQueueDescriptor& descrip
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings;
// Run the layer.
- m_PermuteFunction.configure(&input, &output,
+ m_PermuteFunction.configure(clCompileContext,
+ &input,
+ &output,
armcomputetensorutils::BuildArmComputeTransposeVector(mappings));
}
diff --git a/src/backends/cl/workloads/ClTransposeWorkload.hpp b/src/backends/cl/workloads/ClTransposeWorkload.hpp
index c1bed93b97..4677bdc3f5 100644
--- a/src/backends/cl/workloads/ClTransposeWorkload.hpp
+++ b/src/backends/cl/workloads/ClTransposeWorkload.hpp
@@ -29,7 +29,9 @@ public:
return name;
}
- ClTransposeWorkload(const TransposeQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClTransposeWorkload(const TransposeQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private: