aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime
diff options
context:
space:
mode:
authorSang-Hoon Park <sang-hoon.park@arm.com>2020-10-21 15:58:54 +0100
committerSang-Hoon Park <sang-hoon.park@arm.com>2020-11-07 08:07:22 +0000
commitbef7fa27b0d231a8649952f60808132d109b6345 (patch)
tree7543c66a473d90e28b4860986fad77afa5115043 /arm_compute/runtime
parentb9531540dadce8331a703c32456f3c9defdfefa9 (diff)
downloadComputeLibrary-bef7fa27b0d231a8649952f60808132d109b6345.tar.gz
COMPMID-3639: (3RDPARTY_UPDATE) Move CL kernels to src
Change-Id: I10d27db788e5086adae1841e3e2441cd9b76ef84 Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4310 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/runtime')
-rw-r--r--arm_compute/runtime/CL/ICLOperator.h4
-rw-r--r--arm_compute/runtime/CL/ICLSimpleFunction.h14
-rw-r--r--arm_compute/runtime/CL/functions/CLAbsoluteDifference.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLAccumulate.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLActivationLayer.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h26
-rw-r--r--arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h20
-rw-r--r--arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h20
-rw-r--r--arm_compute/runtime/CL/functions/CLBitwiseAnd.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLBitwiseNot.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLBitwiseOr.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLBitwiseXor.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLBox3x3.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLCannyEdge.h37
-rw-r--r--arm_compute/runtime/CL/functions/CLCast.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLChannelCombine.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLChannelExtract.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h3
-rw-r--r--arm_compute/runtime/CL/functions/CLColorConvert.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLComparison.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLComputeAllAnchors.h5
-rw-r--r--arm_compute/runtime/CL/functions/CLConcatenateLayer.h3
-rw-r--r--arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h5
-rw-r--r--arm_compute/runtime/CL/functions/CLConvolution.h33
-rw-r--r--arm_compute/runtime/CL/functions/CLConvolutionLayer.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLCopy.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLCropResize.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h19
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthConvertLayer.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h59
-rw-r--r--arm_compute/runtime/CL/functions/CLDequantizationLayer.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLDerivative.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLDilate.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h18
-rw-r--r--arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLElementwiseOperations.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLEqualizeHistogram.h29
-rw-r--r--arm_compute/runtime/CL/functions/CLErode.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLFFT1D.h28
-rw-r--r--arm_compute/runtime/CL/functions/CLFFT2D.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLFastCorners.h29
-rw-r--r--arm_compute/runtime/CL/functions/CLFill.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLFillBorder.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLFlattenLayer.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLFloor.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMM.h102
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h25
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h22
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h35
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h7
-rw-r--r--arm_compute/runtime/CL/functions/CLGather.h3
-rw-r--r--arm_compute/runtime/CL/functions/CLGaussian3x3.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLGaussian5x5.h26
-rw-r--r--arm_compute/runtime/CL/functions/CLGaussianPyramid.h28
-rw-r--r--arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h39
-rw-r--r--arm_compute/runtime/CL/functions/CLHOGDescriptor.h23
-rw-r--r--arm_compute/runtime/CL/functions/CLHOGDetector.h19
-rw-r--r--arm_compute/runtime/CL/functions/CLHOGGradient.h16
-rw-r--r--arm_compute/runtime/CL/functions/CLHOGMultiDetection.h36
-rw-r--r--arm_compute/runtime/CL/functions/CLHarrisCorners.h40
-rw-r--r--arm_compute/runtime/CL/functions/CLHistogram.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h3
-rw-r--r--arm_compute/runtime/CL/functions/CLIntegralImage.h16
-rw-r--r--arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h22
-rw-r--r--arm_compute/runtime/CL/functions/CLLSTMLayer.h184
-rw-r--r--arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h30
-rw-r--r--arm_compute/runtime/CL/functions/CLMagnitude.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h21
-rw-r--r--arm_compute/runtime/CL/functions/CLMeanStdDev.h39
-rw-r--r--arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLMedian3x3.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLMinMaxLocation.h28
-rw-r--r--arm_compute/runtime/CL/functions/CLNonLinearFilter.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLNormalizationLayer.h27
-rw-r--r--arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLOpticalFlow.h41
-rw-r--r--arm_compute/runtime/CL/functions/CLPReluLayer.h3
-rw-r--r--arm_compute/runtime/CL/functions/CLPadLayer.h14
-rw-r--r--arm_compute/runtime/CL/functions/CLPermute.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLPhase.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h8
-rw-r--r--arm_compute/runtime/CL/functions/CLPoolingLayer.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLPriorBoxLayer.h5
-rw-r--r--arm_compute/runtime/CL/functions/CLQLSTMLayer.h167
-rw-r--r--arm_compute/runtime/CL/functions/CLQuantizationLayer.h3
-rw-r--r--arm_compute/runtime/CL/functions/CLRNNLayer.h30
-rw-r--r--arm_compute/runtime/CL/functions/CLROIAlignLayer.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLROIPoolingLayer.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLRange.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLReductionOperation.h33
-rw-r--r--arm_compute/runtime/CL/functions/CLRemap.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLReorgLayer.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLReshapeLayer.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLReverse.h3
-rw-r--r--arm_compute/runtime/CL/functions/CLScale.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLScharr3x3.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLSelect.h5
-rw-r--r--arm_compute/runtime/CL/functions/CLSlice.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLSobel3x3.h9
-rw-r--r--arm_compute/runtime/CL/functions/CLSobel5x5.h24
-rw-r--r--arm_compute/runtime/CL/functions/CLSobel7x7.h24
-rw-r--r--arm_compute/runtime/CL/functions/CLSoftmaxLayer.h37
-rw-r--r--arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h19
-rw-r--r--arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h19
-rw-r--r--arm_compute/runtime/CL/functions/CLStackLayer.h21
-rw-r--r--arm_compute/runtime/CL/functions/CLTableLookup.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLThreshold.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLTile.h5
-rw-r--r--arm_compute/runtime/CL/functions/CLTranspose.h3
-rw-r--r--arm_compute/runtime/CL/functions/CLUpsampleLayer.h12
-rw-r--r--arm_compute/runtime/CL/functions/CLWarpAffine.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLWarpPerspective.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h28
-rw-r--r--arm_compute/runtime/CL/functions/CLWinogradInputTransform.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLYOLOLayer.h5
-rw-r--r--arm_compute/runtime/IOperator.h3
121 files changed, 1118 insertions, 670 deletions
diff --git a/arm_compute/runtime/CL/ICLOperator.h b/arm_compute/runtime/CL/ICLOperator.h
index 526b7e93e9..38bcaf32f2 100644
--- a/arm_compute/runtime/CL/ICLOperator.h
+++ b/arm_compute/runtime/CL/ICLOperator.h
@@ -24,7 +24,8 @@
#ifndef ARM_COMPUTE_ICLOPERATOR_H
#define ARM_COMPUTE_ICLOPERATOR_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/Types.h"
+
#include "arm_compute/runtime/IOperator.h"
#include "arm_compute/runtime/IRuntimeContext.h"
#include "arm_compute/runtime/Types.h"
@@ -33,6 +34,7 @@
namespace arm_compute
{
+class ICLKernel;
namespace experimental
{
/** Basic interface for functions which have a single async CL kernel */
diff --git a/arm_compute/runtime/CL/ICLSimpleFunction.h b/arm_compute/runtime/CL/ICLSimpleFunction.h
index 4b1d5b1485..310bf770c4 100644
--- a/arm_compute/runtime/CL/ICLSimpleFunction.h
+++ b/arm_compute/runtime/CL/ICLSimpleFunction.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,8 +24,6 @@
#ifndef ARM_COMPUTE_ICLSIMPLEFUNCTION_H
#define ARM_COMPUTE_ICLSIMPLEFUNCTION_H
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include <memory>
@@ -34,6 +32,8 @@ namespace arm_compute
{
// Forward declarations
class CLRuntimeContext;
+class CLFillBorderKernel;
+class ICLKernel;
/** Basic interface for functions which have a single OpenCL kernel */
class ICLSimpleFunction : public IFunction
@@ -53,14 +53,16 @@ public:
ICLSimpleFunction &operator=(const ICLSimpleFunction &) = delete;
/** Default move assignment operator */
ICLSimpleFunction &operator=(ICLSimpleFunction &&) = default;
+ /** Default destructor */
+ ~ICLSimpleFunction();
// Inherited methods overridden:
void run() override final;
protected:
- std::unique_ptr<ICLKernel> _kernel; /**< Kernel to run */
- CLFillBorderKernel _border_handler; /**< Kernel to handle borders */
- CLRuntimeContext *_ctx; /**< Context to use */
+ std::unique_ptr<ICLKernel> _kernel; /**< Kernel to run */
+ std::unique_ptr<CLFillBorderKernel> _border_handler; /**< Kernel to handle borders */
+ CLRuntimeContext *_ctx; /**< Context to use */
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_ICLSIMPLEFUNCTION_H */
diff --git a/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h b/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h
index b0f1948beb..f2831e2a99 100644
--- a/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h
+++ b/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h
@@ -28,6 +28,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLAbsoluteDifferenceKernel
diff --git a/arm_compute/runtime/CL/functions/CLAccumulate.h b/arm_compute/runtime/CL/functions/CLAccumulate.h
index 9dbf13b873..20d3476d2e 100644
--- a/arm_compute/runtime/CL/functions/CLAccumulate.h
+++ b/arm_compute/runtime/CL/functions/CLAccumulate.h
@@ -30,6 +30,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLAccumulateKernel */
diff --git a/arm_compute/runtime/CL/functions/CLActivationLayer.h b/arm_compute/runtime/CL/functions/CLActivationLayer.h
index 632487c78d..dc2cb62b71 100644
--- a/arm_compute/runtime/CL/functions/CLActivationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLActivationLayer.h
@@ -31,7 +31,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLActivationLayerKernel
*
diff --git a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h
index dc0c37e860..c254284cd7 100644
--- a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h
@@ -24,7 +24,6 @@
#ifndef ARM_COMPUTE_CLARGMINMAXLAYER_H
#define ARM_COMPUTE_CLARGMINMAXLAYER_H
-#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
@@ -36,6 +35,7 @@ namespace arm_compute
{
class ITensorInfo;
class ICLTensor;
+class CLArgMinMaxLayerKernel;
/** Function to calculate the index of the minimum or maximum values in a
* tensor based on an axis.
@@ -53,6 +53,16 @@ public:
* @param[in] memory_manager (Optional) Memory manager.
*/
CLArgMinMaxLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLArgMinMaxLayer(const CLArgMinMaxLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLArgMinMaxLayer &operator=(const CLArgMinMaxLayer &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLArgMinMaxLayer(CLArgMinMaxLayer &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLArgMinMaxLayer &operator=(CLArgMinMaxLayer &&) = delete;
+ /** Default destructor */
+ ~CLArgMinMaxLayer();
/** Set the input and output tensors.
*
* @param[in] input Input source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
@@ -85,13 +95,13 @@ public:
void run() override;
private:
- MemoryGroup _memory_group;
- std::vector<CLTensor> _results_vector;
- CLTensor _not_reshaped_output;
- std::vector<CLArgMinMaxLayerKernel> _reduction_kernels_vector;
- CLReshapeLayer _reshape;
- unsigned int _num_of_stages;
- unsigned int _reduction_axis;
+ MemoryGroup _memory_group;
+ std::vector<CLTensor> _results_vector;
+ CLTensor _not_reshaped_output;
+ std::vector<std::unique_ptr<CLArgMinMaxLayerKernel>> _reduction_kernels_vector;
+ CLReshapeLayer _reshape;
+ unsigned int _num_of_stages;
+ unsigned int _reduction_axis;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLARGMINMAXLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
index c22991da7c..c8acf9fc6b 100644
--- a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
@@ -26,12 +26,16 @@
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
#include "arm_compute/core/Types.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
+class CLBatchNormalizationLayerKernel;
/** Basic function to run @ref CLNormalizationLayerKernel and simulate a batch normalization layer.
*
@@ -44,6 +48,16 @@ class CLBatchNormalizationLayer : public IFunction
public:
/** Default constructor */
CLBatchNormalizationLayer();
+ /** Prevent instances of this class from being copied */
+ CLBatchNormalizationLayer(const CLBatchNormalizationLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLBatchNormalizationLayer &operator=(const CLBatchNormalizationLayer &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLBatchNormalizationLayer(CLBatchNormalizationLayer &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLBatchNormalizationLayer &operator=(CLBatchNormalizationLayer &&) = delete;
+ /** Default destructor */
+ ~CLBatchNormalizationLayer();
/** Set the input and output tensors.
*
* @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place
@@ -104,7 +118,7 @@ public:
void run() override;
private:
- CLBatchNormalizationLayerKernel _norm_kernel; /**< BatchNormalization layer kernel to run */
+ std::unique_ptr<CLBatchNormalizationLayerKernel> _norm_kernel; /**< BatchNormalization layer kernel to run */
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_CLBATCHNORMALIZATIONLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h b/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h
index ba57921cc2..bdb58531d0 100644
--- a/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h
+++ b/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h
@@ -26,11 +26,15 @@
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
#include "arm_compute/core/Types.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class ITensorInfo;
+class CLBatchToSpaceLayerKernel;
class ICLTensor;
/** Basic function to run @ref CLBatchToSpaceLayerKernel. */
@@ -39,6 +43,16 @@ class CLBatchToSpaceLayer : public IFunction
public:
/** Default constructor */
CLBatchToSpaceLayer();
+ /** Prevent instances of this class from being copied */
+ CLBatchToSpaceLayer(const CLBatchToSpaceLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLBatchToSpaceLayer &operator=(const CLBatchToSpaceLayer &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLBatchToSpaceLayer(CLBatchToSpaceLayer &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLBatchToSpaceLayer &operator=(CLBatchToSpaceLayer &&) = delete;
+ /** Default destructor */
+ ~CLBatchToSpaceLayer();
/** Set the input and output tensors.
*
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
@@ -95,7 +109,7 @@ public:
void run() override;
private:
- CLBatchToSpaceLayerKernel _batch_to_space_kernel; /**< CLBatchToSpaceLayerKernel to run */
+ std::unique_ptr<CLBatchToSpaceLayerKernel> _batch_to_space_kernel; /**< CLBatchToSpaceLayerKernel to run */
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_CLBATCHTOSPACELAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLBitwiseAnd.h b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h
index 3c28938807..bf5993f4b0 100644
--- a/arm_compute/runtime/CL/functions/CLBitwiseAnd.h
+++ b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h
@@ -28,6 +28,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLBitwiseAndKernel.
diff --git a/arm_compute/runtime/CL/functions/CLBitwiseNot.h b/arm_compute/runtime/CL/functions/CLBitwiseNot.h
index 4c21d5647f..1d8531a176 100644
--- a/arm_compute/runtime/CL/functions/CLBitwiseNot.h
+++ b/arm_compute/runtime/CL/functions/CLBitwiseNot.h
@@ -28,6 +28,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLBitwiseNotKernel.
diff --git a/arm_compute/runtime/CL/functions/CLBitwiseOr.h b/arm_compute/runtime/CL/functions/CLBitwiseOr.h
index 8a481737e3..7876cbf196 100644
--- a/arm_compute/runtime/CL/functions/CLBitwiseOr.h
+++ b/arm_compute/runtime/CL/functions/CLBitwiseOr.h
@@ -28,6 +28,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLBitwiseOrKernel.
diff --git a/arm_compute/runtime/CL/functions/CLBitwiseXor.h b/arm_compute/runtime/CL/functions/CLBitwiseXor.h
index 6928e59d38..4f054062cd 100644
--- a/arm_compute/runtime/CL/functions/CLBitwiseXor.h
+++ b/arm_compute/runtime/CL/functions/CLBitwiseXor.h
@@ -28,6 +28,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLBitwiseXorKernel.
diff --git a/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h b/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h
index 5e4e89071b..d6409106da 100644
--- a/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h
+++ b/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h
@@ -24,12 +24,16 @@
#ifndef ARM_COMPUTE_CLBOUNDINGBOXTRANSOFORM_H
#define ARM_COMPUTE_CLBOUNDINGBOXTRANSOFORM_H
-#include "arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h"
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLBoundingBoxTransformKernel;
+class BoundingBoxTransformInfo;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLBoundingBoxTransformKernel.
*
diff --git a/arm_compute/runtime/CL/functions/CLBox3x3.h b/arm_compute/runtime/CL/functions/CLBox3x3.h
index 2d2aa4705c..cff780614c 100644
--- a/arm_compute/runtime/CL/functions/CLBox3x3.h
+++ b/arm_compute/runtime/CL/functions/CLBox3x3.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute box filter 3x3. This function calls the following OpenCL kernels:
diff --git a/arm_compute/runtime/CL/functions/CLCannyEdge.h b/arm_compute/runtime/CL/functions/CLCannyEdge.h
index f9d9f8f66a..9e41c31728 100644
--- a/arm_compute/runtime/CL/functions/CLCannyEdge.h
+++ b/arm_compute/runtime/CL/functions/CLCannyEdge.h
@@ -26,8 +26,6 @@
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLCannyEdgeKernel.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
@@ -36,6 +34,11 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLFillBorderKernel;
+class CLGradientKernel;
+class CLEdgeNonMaxSuppressionKernel;
+class CLEdgeTraceKernel;
class ICLTensor;
/** Basic function to execute canny edge on OpenCL. This function calls the following OpenCL kernels and functions:
@@ -56,6 +59,8 @@ public:
CLCannyEdge(const CLCannyEdge &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
CLCannyEdge &operator=(const CLCannyEdge &) = delete;
+ /** Default destructor */
+ ~CLCannyEdge();
/** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode.
*
* @param[in,out] input Source tensor. Data types supported: U8. (Written to only for border_mode != UNDEFINED)
@@ -88,20 +93,20 @@ public:
virtual void run() override;
private:
- MemoryGroup _memory_group; /**< Function's memory group */
- std::unique_ptr<IFunction> _sobel; /**< Pointer to Sobel kernel. */
- CLGradientKernel _gradient; /**< Gradient kernel. */
- CLFillBorderKernel _border_mag_gradient; /**< Fill border on magnitude tensor kernel */
- CLEdgeNonMaxSuppressionKernel _non_max_suppr; /**< Non-Maxima suppression kernel. */
- CLEdgeTraceKernel _edge_trace; /**< Edge tracing kernel. */
- CLImage _gx; /**< Source tensor - Gx component. */
- CLImage _gy; /**< Source tensor - Gy component. */
- CLImage _mag; /**< Source tensor - Magnitude. */
- CLImage _phase; /**< Source tensor - Phase. */
- CLImage _nonmax; /**< Source tensor - Non-Maxima suppressed. */
- CLImage _visited, _recorded, _l1_list_counter, _l1_stack; /**< Temporary tensors */
- ICLTensor *_output; /**< Output tensor provided by the user. */
+ MemoryGroup _memory_group; /**< Function's memory group */
+ std::unique_ptr<IFunction> _sobel; /**< Pointer to Sobel kernel. */
+ std::unique_ptr<CLGradientKernel> _gradient; /**< Gradient kernel. */
+ std::unique_ptr<CLFillBorderKernel> _border_mag_gradient; /**< Fill border on magnitude tensor kernel */
+ std::unique_ptr<CLEdgeNonMaxSuppressionKernel> _non_max_suppr; /**< Non-Maxima suppression kernel. */
+ std::unique_ptr<CLEdgeTraceKernel> _edge_trace; /**< Edge tracing kernel. */
+ CLImage _gx; /**< Source tensor - Gx component. */
+ CLImage _gy; /**< Source tensor - Gy component. */
+ CLImage _mag; /**< Source tensor - Magnitude. */
+ CLImage _phase; /**< Source tensor - Phase. */
+ CLImage _nonmax; /**< Source tensor - Non-Maxima suppressed. */
+ CLImage _visited, _recorded, _l1_list_counter, _l1_stack; /**< Temporary tensors */
+ ICLTensor *_output; /**< Output tensor provided by the user. */
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_CLCANNYEDGE_H */
diff --git a/arm_compute/runtime/CL/functions/CLCast.h b/arm_compute/runtime/CL/functions/CLCast.h
index 592368d135..bd333d4e72 100644
--- a/arm_compute/runtime/CL/functions/CLCast.h
+++ b/arm_compute/runtime/CL/functions/CLCast.h
@@ -31,7 +31,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLDepthConvertLayerKernel. */
class CLCast : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLChannelCombine.h b/arm_compute/runtime/CL/functions/CLChannelCombine.h
index 4e3d10cc10..5927662fc2 100644
--- a/arm_compute/runtime/CL/functions/CLChannelCombine.h
+++ b/arm_compute/runtime/CL/functions/CLChannelCombine.h
@@ -28,6 +28,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLMultiImage;
class ICLTensor;
using ICLImage = ICLTensor;
diff --git a/arm_compute/runtime/CL/functions/CLChannelExtract.h b/arm_compute/runtime/CL/functions/CLChannelExtract.h
index cf042b4519..9ce9bcdd8a 100644
--- a/arm_compute/runtime/CL/functions/CLChannelExtract.h
+++ b/arm_compute/runtime/CL/functions/CLChannelExtract.h
@@ -29,6 +29,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLMultiImage;
class ICLTensor;
using ICLImage = ICLTensor;
diff --git a/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h b/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h
index e0bb3d01c9..54cf59f59a 100644
--- a/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h
+++ b/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h
@@ -24,11 +24,14 @@
#ifndef ARM_COMPUTE_CLCHANNELSHUFFLELAYER_H
#define ARM_COMPUTE_CLCHANNELSHUFFLELAYER_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLChannelShuffleLayerKernel
*
diff --git a/arm_compute/runtime/CL/functions/CLColorConvert.h b/arm_compute/runtime/CL/functions/CLColorConvert.h
index e4017c2686..47bcabfb63 100644
--- a/arm_compute/runtime/CL/functions/CLColorConvert.h
+++ b/arm_compute/runtime/CL/functions/CLColorConvert.h
@@ -28,6 +28,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLMultiImage;
class ICLTensor;
using ICLImage = ICLTensor;
diff --git a/arm_compute/runtime/CL/functions/CLComparison.h b/arm_compute/runtime/CL/functions/CLComparison.h
index c6d61e45f2..8cc3e96ec5 100644
--- a/arm_compute/runtime/CL/functions/CLComparison.h
+++ b/arm_compute/runtime/CL/functions/CLComparison.h
@@ -30,7 +30,9 @@
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLComparisonKernel */
class CLComparison : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h b/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h
index a2f1a4eb66..d6a2ab423d 100644
--- a/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h
+++ b/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h
@@ -24,12 +24,15 @@
#ifndef ARM_COMPUTE_CLCOMPUTEALLANCHORS_H
#define ARM_COMPUTE_CLCOMPUTEALLANCHORS_H
-#include "arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
+class ComputeAnchorsInfo;
/** Basic function to run @ref CLComputeAllAnchorsKernel.
*
diff --git a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
index f535c8ea97..5e7003a112 100644
--- a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
@@ -27,7 +27,6 @@
#include "arm_compute/runtime/CL/ICLOperator.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
#include <memory>
@@ -36,7 +35,9 @@
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
class ICLTensor;
+class ICLKernel;
class ITensorInfo;
class Status;
diff --git a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
index 9298be2e53..75a3d3213e 100644
--- a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
+++ b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
@@ -24,14 +24,17 @@
#ifndef ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTS_H
#define ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTS_H
-#include "arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
#include "arm_compute/runtime/ITransformWeights.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLConvertFullyConnectedWeightsKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLConvertFullyConnectedWeightsKernel. */
class CLConvertFullyConnectedWeights : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLConvolution.h b/arm_compute/runtime/CL/functions/CLConvolution.h
index c06ad0d969..4a1631a702 100644
--- a/arm_compute/runtime/CL/functions/CLConvolution.h
+++ b/arm_compute/runtime/CL/functions/CLConvolution.h
@@ -24,8 +24,6 @@
#ifndef ARM_COMPUTE_CLCONVOLUTION_H
#define ARM_COMPUTE_CLCONVOLUTION_H
-#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
@@ -38,6 +36,13 @@
namespace arm_compute
{
+template <unsigned int matrix_size>
+class CLConvolutionKernel;
+template <unsigned int matrix_size>
+class CLSeparableConvolutionHorKernel;
+template <unsigned int matrix_size>
+class CLSeparableConvolutionVertKernel;
+class CLFillBorderKernel;
class ICLTensor;
/** Basic function to execute convolution of size 3x3. This function calls the following OpenCL kernels:
@@ -85,6 +90,16 @@ class CLConvolutionSquare : public IFunction
public:
/** Default constructor */
CLConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLConvolutionSquare(const CLConvolutionSquare &) = delete;
+ /** Default move constructor */
+ CLConvolutionSquare(CLConvolutionSquare &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLConvolutionSquare &operator=(const CLConvolutionSquare &) = delete;
+ /** Default move assignment operator */
+ CLConvolutionSquare &operator=(CLConvolutionSquare &&) = default;
+ /** Default destructor */
+ ~CLConvolutionSquare();
/** Initialize the function's source, destination, conv and border_mode.
*
* @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
@@ -111,13 +126,13 @@ public:
void run() override;
private:
- MemoryGroup _memory_group; /**< Function's memory group */
- CLTensor _tmp; /**< temporary buffer for output of horizontal pass */
- bool _is_separable; /**< true if the convolution can be separated */
- CLSeparableConvolutionHorKernel<matrix_size> _kernel_hor; /**< kernel for horizontal pass of separated convolution */
- CLSeparableConvolutionVertKernel<matrix_size> _kernel_vert; /**< kernel for vertical pass of separated convolution */
- CLConvolutionKernel<matrix_size> _kernel; /**< kernel for non-separated convolution **/
- CLFillBorderKernel _border_handler; /**< kernel for border handling */
+ MemoryGroup _memory_group; /**< Function's memory group */
+ CLTensor _tmp; /**< temporary buffer for output of horizontal pass */
+ bool _is_separable; /**< true if the convolution can be separated */
+ std::unique_ptr<CLSeparableConvolutionHorKernel<matrix_size>> _kernel_hor; /**< kernel for horizontal pass of separated convolution */
+ std::unique_ptr<CLSeparableConvolutionVertKernel<matrix_size>> _kernel_vert; /**< kernel for vertical pass of separated convolution */
+ std::unique_ptr<CLConvolutionKernel<matrix_size>> _kernel; /**< kernel for non-separated convolution **/
+ std::unique_ptr<CLFillBorderKernel> _border_handler; /**< kernel for border handling */
};
/** Basic function to run 5x5 convolution. */
diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
index ac36523682..d1de721193 100644
--- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
@@ -73,6 +73,16 @@ class CLConvolutionLayer : public IFunction
public:
/** Default constructor */
CLConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Default Destructor */
+ ~CLConvolutionLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLConvolutionLayer(const CLConvolutionLayer &) = delete;
+ /** Default move constructor */
+ CLConvolutionLayer(CLConvolutionLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLConvolutionLayer &operator=(const CLConvolutionLayer &) = delete;
+ /** Default move assignment operator */
+ CLConvolutionLayer &operator=(CLConvolutionLayer &&) = default;
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
diff --git a/arm_compute/runtime/CL/functions/CLCopy.h b/arm_compute/runtime/CL/functions/CLCopy.h
index c20d75eea8..f1a091df84 100644
--- a/arm_compute/runtime/CL/functions/CLCopy.h
+++ b/arm_compute/runtime/CL/functions/CLCopy.h
@@ -31,7 +31,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
class CLCopy : public ICLSimpleFunction
{
diff --git a/arm_compute/runtime/CL/functions/CLCropResize.h b/arm_compute/runtime/CL/functions/CLCropResize.h
index e940928b90..e781cfe61f 100644
--- a/arm_compute/runtime/CL/functions/CLCropResize.h
+++ b/arm_compute/runtime/CL/functions/CLCropResize.h
@@ -25,9 +25,7 @@
#define ARM_COMPUTE_CL_CROP_RESIZE_H
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
-#include "arm_compute/core/CL/kernels/CLCropKernel.h"
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
+
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLScale.h"
@@ -37,7 +35,11 @@
namespace arm_compute
{
// Forward Declarations
+class CLCompileContext;
+class CLCopyKernel;
+class CLCropKernel;
class ITensor;
+class ITensorInfo;
/** Function to perform cropping and resizing */
class CLCropResize : public IFunction
@@ -54,7 +56,7 @@ public:
/** Allow instances of this class to be moved */
CLCropResize &operator=(CLCropResize &&) = default;
/** Default destructor */
- virtual ~CLCropResize() = default;
+ ~CLCropResize();
/** Configure kernel
*
diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
index 19a44f7b93..3ebc858d32 100644
--- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
+++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
@@ -24,17 +24,20 @@
#ifndef ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLE_H
#define ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLE_H
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
// Forward declarations
+class CLDeconvolutionLayerUpsampleKernel;
+class CLCompileContext;
+class CLMemsetKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to execute deconvolution upsample on OpenCL. This function calls the following OpenCL kernels and functions:
*
@@ -55,7 +58,7 @@ public:
/** Allow instances of this class to be moved */
CLDeconvolutionLayerUpsample &operator=(CLDeconvolutionLayerUpsample &&) = default;
/** Default destructor */
- virtual ~CLDeconvolutionLayerUpsample() = default;
+ ~CLDeconvolutionLayerUpsample();
/** Initialize the function's source, destination, interpolation type and border_mode.
*
@@ -86,9 +89,9 @@ public:
void run() override;
private:
- CLDeconvolutionLayerUpsampleKernel _upsample;
- CLMemsetKernel _memset;
- ICLTensor *_output;
+ std::unique_ptr<CLDeconvolutionLayerUpsampleKernel> _upsample;
+ std::unique_ptr<CLMemsetKernel> _memset;
+ ICLTensor *_output;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLE_H */
diff --git a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
index d125584c97..b0f297aec5 100644
--- a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
@@ -31,7 +31,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLDepthConvertLayerKernel. */
class CLDepthConvertLayer : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h b/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h
index 5e197cb9b8..a0aa288dbf 100644
--- a/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h
@@ -29,7 +29,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLDepthToSpaceLayerKernel. */
class CLDepthToSpaceLayer : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
index 558c4540fa..8e594bc09f 100644
--- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
@@ -24,12 +24,6 @@
#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H
#define ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLPermute.h"
@@ -38,6 +32,11 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLFillBorderKernel;
+class CLDepthwiseConvolutionLayerNativeKernel;
+class CLDepthwiseConvolutionLayerReshapeWeightsKernel;
+class ICLDepthwiseConvolutionLayer3x3Kernel;
class ICLTensor;
/** Function to execute a depthwise convolution
@@ -55,6 +54,8 @@ public:
CLDepthwiseConvolutionLayer &operator=(const CLDepthwiseConvolutionLayer &) = delete;
/** Default move assignment operator */
CLDepthwiseConvolutionLayer &operator=(CLDepthwiseConvolutionLayer &&) = default;
+ /** Default destructor */
+ ~CLDepthwiseConvolutionLayer();
/** Initialize the function's source, destination, weights and convolution information.
*
* @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP16/FP32. Data layout supported: NHWC, NCHW
@@ -211,25 +212,25 @@ private:
};
private:
- MemoryGroup _memory_group;
- std::unique_ptr<ICLDepthwiseConvolutionLayer3x3Kernel> _kernel;
- CLFillBorderKernel _border_handler;
- CLPermute _permute_input_to_nchw;
- CLPermute _permute_weights_to_nchw;
- CLPermute _permute_output_to_nhwc;
- CLDepthwiseConvolutionLayerReshapeWeightsKernel _reshape_weights;
- CLTensor _permuted_input;
- CLTensor _permuted_weights;
- CLTensor _permuted_output;
- CLTensor _output_multipliers;
- CLTensor _output_shifts;
- const ITensor *_original_weights;
- const ITensor *_input;
- const ITensor *_output;
- bool _needs_permute;
- bool _needs_weights_reshape;
- bool _is_prepared;
- bool _is_quantized;
+ MemoryGroup _memory_group;
+ std::unique_ptr<ICLDepthwiseConvolutionLayer3x3Kernel> _kernel;
+ std::unique_ptr<CLFillBorderKernel> _border_handler;
+ CLPermute _permute_input_to_nchw;
+ CLPermute _permute_weights_to_nchw;
+ CLPermute _permute_output_to_nhwc;
+ std::unique_ptr<CLDepthwiseConvolutionLayerReshapeWeightsKernel> _reshape_weights;
+ CLTensor _permuted_input;
+ CLTensor _permuted_weights;
+ CLTensor _permuted_output;
+ CLTensor _output_multipliers;
+ CLTensor _output_shifts;
+ const ITensor *_original_weights;
+ const ITensor *_input;
+ const ITensor *_output;
+ bool _needs_permute;
+ bool _needs_weights_reshape;
+ bool _is_prepared;
+ bool _is_quantized;
};
/** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels:
@@ -313,10 +314,10 @@ private:
private:
MemoryGroup _memory_group;
- CLDepthwiseConvolutionLayerNativeKernel _dwc_native_kernel;
- CLPermute _permute_input_to_nhwc;
- CLPermute _permute_weights_to_nhwc;
- CLPermute _permute_output_to_nchw;
+ std::unique_ptr<CLDepthwiseConvolutionLayerNativeKernel> _dwc_native_kernel;
+ CLPermute _permute_input_to_nhwc;
+ CLPermute _permute_weights_to_nhwc;
+ CLPermute _permute_output_to_nchw;
CLTensor _permuted_input;
CLTensor _permuted_weights;
diff --git a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
index 88ed915421..b2cf3356f4 100644
--- a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
@@ -31,7 +31,9 @@
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLDequantizationLayerKernel that dequantizes an input tensor */
class CLDequantizationLayer : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLDerivative.h b/arm_compute/runtime/CL/functions/CLDerivative.h
index 1aba6a9f6c..4a91d5d50b 100644
--- a/arm_compute/runtime/CL/functions/CLDerivative.h
+++ b/arm_compute/runtime/CL/functions/CLDerivative.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute first order derivative operator. This function calls the following CL kernels:
diff --git a/arm_compute/runtime/CL/functions/CLDilate.h b/arm_compute/runtime/CL/functions/CLDilate.h
index adb9cf4e6c..bf72cd3b26 100644
--- a/arm_compute/runtime/CL/functions/CLDilate.h
+++ b/arm_compute/runtime/CL/functions/CLDilate.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute dilate. This function calls the following OpenCL kernels:
diff --git a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
index 8107fa24f3..0afc9d3f38 100644
--- a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
@@ -24,8 +24,6 @@
#ifndef ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYER_H
#define ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYER_H
-#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
#include "arm_compute/runtime/IFunction.h"
@@ -34,7 +32,11 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLDirectConvolutionLayerKernel;
+class CLFillBorderKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to execute direct convolution function:
*/
@@ -43,6 +45,12 @@ class CLDirectConvolutionLayer : public IFunction
public:
/** Default constructor */
CLDirectConvolutionLayer();
+ /** Prevent instances of this class from being copied */
+ CLDirectConvolutionLayer(const CLDirectConvolutionLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLDirectConvolutionLayer &operator=(const CLDirectConvolutionLayer &) = delete;
+ /** Default destructor */
+ ~CLDirectConvolutionLayer();
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -95,9 +103,9 @@ public:
void run() override;
private:
- CLDirectConvolutionLayerKernel _direct_conv_kernel;
- CLFillBorderKernel _input_border_handler;
- CLActivationLayer _activationlayer_function;
+ std::unique_ptr<CLDirectConvolutionLayerKernel> _direct_conv_kernel;
+ std::unique_ptr<CLFillBorderKernel> _input_border_handler;
+ CLActivationLayer _activationlayer_function;
bool _is_activationlayer_enabled;
};
diff --git a/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h b/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h
index 5208bfe404..72b5b7dee8 100644
--- a/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h
+++ b/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h
@@ -29,7 +29,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to perform inverse square root on an input tensor. */
class CLRsqrtLayer : public IFunction
diff --git a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
index 31d4f2e745..55c5fb3455 100644
--- a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
+++ b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
@@ -30,6 +30,8 @@
namespace arm_compute
{
class ICLTensor;
+class CLCompileContext;
+class ITensorInfo;
namespace experimental
{
diff --git a/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h b/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h
index 883f330b33..17352d1a9b 100644
--- a/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h
+++ b/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h
@@ -24,16 +24,19 @@
#ifndef ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H
#define ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H
-#include "arm_compute/core/CL/kernels/CLHistogramKernel.h"
-#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h"
#include "arm_compute/runtime/CL/CLDistribution1D.h"
#include "arm_compute/runtime/CL/CLLut.h"
#include "arm_compute/runtime/IFunction.h"
#include <cstdint>
+#include <memory>
namespace arm_compute
{
+class CLCompileContext;
+class CLHistogramKernel;
+class CLHistogramBorderKernel;
+class CLTableLookupKernel;
class ICLTensor;
using ICLImage = ICLTensor;
@@ -48,6 +51,12 @@ class CLEqualizeHistogram : public IFunction
public:
/** Default Constructor. */
CLEqualizeHistogram();
+ /** Prevent instances of this class from being copied */
+ CLEqualizeHistogram(const CLEqualizeHistogram &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLEqualizeHistogram &operator=(const CLEqualizeHistogram &) = delete;
+ /** Default destructor */
+ ~CLEqualizeHistogram();
/** Initialise the kernel's inputs.
*
* @param[in] input Input image. Data types supported: U8.
@@ -66,14 +75,14 @@ public:
void run() override;
private:
- CLHistogramKernel _histogram_kernel; /**< Kernel that calculates the histogram of input. */
- CLHistogramBorderKernel _border_histogram_kernel; /**< Kernel that calculates the histogram on the borders. */
- CLTableLookupKernel _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */
- CLDistribution1D _hist; /**< Distribution that holds the histogram of the input image. */
- CLDistribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */
- CLLut _cd_lut; /**< Holds the equalization lookuptable. */
- static const uint32_t max_range = 256; /**< Histogram range of the internal histograms. */
- static const uint32_t nr_bins = 256; /**< Histogram bins of the internal histograms. */
+ std::unique_ptr<CLHistogramKernel> _histogram_kernel; /**< Kernel that calculates the histogram of input. */
+ std::unique_ptr<CLHistogramBorderKernel> _border_histogram_kernel; /**< Kernel that calculates the histogram on the borders. */
+ std::unique_ptr<CLTableLookupKernel> _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */
+ CLDistribution1D _hist; /**< Distribution that holds the histogram of the input image. */
+ CLDistribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */
+ CLLut _cd_lut; /**< Holds the equalization lookuptable. */
+ static const uint32_t max_range = 256; /**< Histogram range of the internal histograms. */
+ static const uint32_t nr_bins = 256; /**< Histogram bins of the internal histograms. */
};
}
#endif /*ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H */
diff --git a/arm_compute/runtime/CL/functions/CLErode.h b/arm_compute/runtime/CL/functions/CLErode.h
index f8f1c72bc0..9d799bc91e 100644
--- a/arm_compute/runtime/CL/functions/CLErode.h
+++ b/arm_compute/runtime/CL/functions/CLErode.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute erode. This function calls the following OpenCL kernels:
diff --git a/arm_compute/runtime/CL/functions/CLFFT1D.h b/arm_compute/runtime/CL/functions/CLFFT1D.h
index a6a35ab320..31a2cc6b06 100644
--- a/arm_compute/runtime/CL/functions/CLFFT1D.h
+++ b/arm_compute/runtime/CL/functions/CLFFT1D.h
@@ -26,9 +26,6 @@
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h"
-#include "arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h"
-#include "arm_compute/core/CL/kernels/CLFFTScaleKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/FunctionDescriptors.h"
#include "arm_compute/runtime/MemoryGroup.h"
@@ -36,6 +33,9 @@
namespace arm_compute
{
// Forward declaration
+class CLFFTDigitReverseKernel;
+class CLFFTRadixStageKernel;
+class CLFFTScaleKernel;
class ICLTensor;
/** Basic function to execute one dimensional FFT. This function calls the following OpenCL kernels:
@@ -49,6 +49,12 @@ class CLFFT1D : public IFunction
public:
/** Default Constructor */
CLFFT1D(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLFFT1D(const CLFFT1D &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLFFT1D &operator=(const CLFFT1D &) = delete;
+ /** Default destructor */
+ ~CLFFT1D();
/** Initialise the function's source, destinations and border mode.
*
* @param[in] input Source tensor. Data types supported: F32.
@@ -78,14 +84,14 @@ public:
void run() override;
protected:
- MemoryGroup _memory_group;
- CLFFTDigitReverseKernel _digit_reverse_kernel;
- std::vector<CLFFTRadixStageKernel> _fft_kernels;
- CLFFTScaleKernel _scale_kernel;
- CLTensor _digit_reversed_input;
- CLTensor _digit_reverse_indices;
- unsigned int _num_ffts;
- bool _run_scale;
+ MemoryGroup _memory_group;
+ std::unique_ptr<CLFFTDigitReverseKernel> _digit_reverse_kernel;
+ std::vector<std::unique_ptr<CLFFTRadixStageKernel>> _fft_kernels;
+ std::unique_ptr<CLFFTScaleKernel> _scale_kernel;
+ CLTensor _digit_reversed_input;
+ CLTensor _digit_reverse_indices;
+ unsigned int _num_ffts;
+ bool _run_scale;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_CLFFT1D_H */
diff --git a/arm_compute/runtime/CL/functions/CLFFT2D.h b/arm_compute/runtime/CL/functions/CLFFT2D.h
index 9ceebeaa32..126944b323 100644
--- a/arm_compute/runtime/CL/functions/CLFFT2D.h
+++ b/arm_compute/runtime/CL/functions/CLFFT2D.h
@@ -46,6 +46,12 @@ class CLFFT2D : public IFunction
public:
/** Default Constructor */
CLFFT2D(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLFFT2D(const CLFFT2D &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLFFT2D &operator=(const CLFFT2D &) = delete;
+ /** Default destructor */
+ ~CLFFT2D();
/** Initialise the function's source, destinations and border mode.
*
* @param[in] input Source tensor. Data types supported: F32.
diff --git a/arm_compute/runtime/CL/functions/CLFastCorners.h b/arm_compute/runtime/CL/functions/CLFastCorners.h
index 698cc67995..e110582c50 100644
--- a/arm_compute/runtime/CL/functions/CLFastCorners.h
+++ b/arm_compute/runtime/CL/functions/CLFastCorners.h
@@ -25,7 +25,6 @@
#define ARM_COMPUTE_CLFASTCORNERS_H
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/runtime/CL/CLArray.h"
@@ -40,6 +39,8 @@
namespace arm_compute
{
+class CLFastCornersKernel;
+class CLCopyToArrayKernel;
class ICLTensor;
using ICLImage = ICLTensor;
@@ -59,6 +60,8 @@ public:
CLFastCorners(const CLFastCorners &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
const CLFastCorners &operator=(const CLFastCorners &) = delete;
+ /** Default destructor */
+ ~CLFastCorners();
/** Initialize the function's source, destination, conv and border_mode.
*
* @param[in] input Source image. Data types supported: U8.
@@ -88,18 +91,18 @@ public:
void run() override;
private:
- MemoryGroup _memory_group;
- CLFastCornersKernel _fast_corners_kernel;
- CLNonMaximaSuppression3x3 _suppr_func;
- CLCopyToArrayKernel _copy_array_kernel;
- CLImage _output;
- CLImage _suppr;
- Window _win;
- bool _non_max;
- unsigned int *_num_corners;
- cl::Buffer _num_buffer;
- ICLKeyPointArray *_corners;
- uint8_t _constant_border_value;
+ MemoryGroup _memory_group;
+ std::unique_ptr<CLFastCornersKernel> _fast_corners_kernel;
+ CLNonMaximaSuppression3x3 _suppr_func;
+ std::unique_ptr<CLCopyToArrayKernel> _copy_array_kernel;
+ CLImage _output;
+ CLImage _suppr;
+ Window _win;
+ bool _non_max;
+ unsigned int *_num_corners;
+ cl::Buffer _num_buffer;
+ ICLKeyPointArray *_corners;
+ uint8_t _constant_border_value;
};
}
#endif /*ARM_COMPUTE_CLFASTCORNERS_H */
diff --git a/arm_compute/runtime/CL/functions/CLFill.h b/arm_compute/runtime/CL/functions/CLFill.h
index b79b234158..fef8324432 100644
--- a/arm_compute/runtime/CL/functions/CLFill.h
+++ b/arm_compute/runtime/CL/functions/CLFill.h
@@ -30,6 +30,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Function to run @ref CLMemsetKernel to fill a tensor with a scalar value */
diff --git a/arm_compute/runtime/CL/functions/CLFillBorder.h b/arm_compute/runtime/CL/functions/CLFillBorder.h
index 18bc20e654..a4ad82dfd4 100644
--- a/arm_compute/runtime/CL/functions/CLFillBorder.h
+++ b/arm_compute/runtime/CL/functions/CLFillBorder.h
@@ -30,6 +30,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLFillBorderKernel */
diff --git a/arm_compute/runtime/CL/functions/CLFlattenLayer.h b/arm_compute/runtime/CL/functions/CLFlattenLayer.h
index b8139c2260..f5f4ff554f 100644
--- a/arm_compute/runtime/CL/functions/CLFlattenLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFlattenLayer.h
@@ -29,7 +29,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to execute flatten. This function calls the following OpenCL kernel:
*
diff --git a/arm_compute/runtime/CL/functions/CLFloor.h b/arm_compute/runtime/CL/functions/CLFloor.h
index 93c3639f89..85d7071194 100644
--- a/arm_compute/runtime/CL/functions/CLFloor.h
+++ b/arm_compute/runtime/CL/functions/CLFloor.h
@@ -30,7 +30,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLFloorKernel */
class CLFloor : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
index 29788742d7..3f17e4a921 100644
--- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
@@ -26,7 +26,6 @@
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h"
#include "arm_compute/runtime/CL/functions/CLFlattenLayer.h"
diff --git a/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h b/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h
index de6d5617c2..e35905fcf1 100644
--- a/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h
+++ b/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h
@@ -24,14 +24,18 @@
#ifndef ARM_COMPUTE_CLFUSEBATCHNORMALIZATION_H
#define ARM_COMPUTE_CLFUSEBATCHNORMALIZATION_H
-#include "arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+#include <memory>
+
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
+class CLFuseBatchNormalizationKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to fuse the batch normalization node to a preceding convolution node */
class CLFuseBatchNormalization : public IFunction
@@ -48,7 +52,7 @@ public:
/** Allow instances of this class to be moved */
CLFuseBatchNormalization &operator=(CLFuseBatchNormalization &&) = default;
/** Default destructor */
- ~CLFuseBatchNormalization() = default;
+ ~CLFuseBatchNormalization();
/** Set the input and output tensors.
*
* @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
@@ -112,7 +116,7 @@ public:
void run() override;
private:
- CLFuseBatchNormalizationKernel _fuse_bn_kernel;
+ std::unique_ptr<CLFuseBatchNormalizationKernel> _fuse_bn_kernel;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_CLFUSEBATCHNORMALIZATION_H */
diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h
index 92f9736e35..0b13e7dbbf 100644
--- a/arm_compute/runtime/CL/functions/CLGEMM.h
+++ b/arm_compute/runtime/CL/functions/CLGEMM.h
@@ -24,11 +24,6 @@
#ifndef ARM_COMPUTE_CLGEMM_H
#define ARM_COMPUTE_CLGEMM_H
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTypes.h"
#include "arm_compute/runtime/IFunction.h"
@@ -36,9 +31,18 @@
#include "arm_compute/runtime/IWeightsManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class CLGEMMReshapeRHSMatrixKernel;
+class CLGEMMMatrixMultiplyKernel;
+class CLGEMMMatrixMultiplyReshapedKernel;
+class CLGEMMMatrixMultiplyReshapedOnlyRHSKernel;
+class CLGEMMReshapeLHSMatrixKernel;
class ICLTensor;
+class ITensorInfo;
namespace weights_transformations
{
@@ -46,41 +50,36 @@ namespace weights_transformations
class CLGEMMReshapeRHSMatrixKernelManaged : public ITransformWeights
{
public:
+ /** Default constructor */
+ CLGEMMReshapeRHSMatrixKernelManaged();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMReshapeRHSMatrixKernelManaged(const CLGEMMReshapeRHSMatrixKernelManaged &) = delete;
+ /** Default move constructor */
+ CLGEMMReshapeRHSMatrixKernelManaged(CLGEMMReshapeRHSMatrixKernelManaged &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMReshapeRHSMatrixKernelManaged &operator=(const CLGEMMReshapeRHSMatrixKernelManaged &) = delete;
+ /** Default move assignment operator */
+ CLGEMMReshapeRHSMatrixKernelManaged &operator=(CLGEMMReshapeRHSMatrixKernelManaged &&) = default;
+ /** Default desctructor */
+ ~CLGEMMReshapeRHSMatrixKernelManaged();
//Inherited method override
- void run() override
- {
- _output.allocator()->allocate();
- CLScheduler::get().enqueue(_kernel, false);
- _reshape_run = true;
- }
+ void run() override;
//Inherited method override
- void release() override
- {
- _output.allocator()->free();
- }
+ void release() override;
//Inherited method override
- ICLTensor *get_weights() override
- {
- return &_output;
- }
+ ICLTensor *get_weights() override;
//Inherited method override
- uint32_t uid() override
- {
- return _uid;
- }
+ uint32_t uid() override;
/** Configures the @ref CLGEMMReshapeRHSMatrixKernel kernel
*
* @param[in] input Input tensor. Data types supported: All
* @param[in] info RHS matrix information to be used for reshaping.
*/
- void configure(const ICLTensor *input, GEMMRHSMatrixInfo info)
- {
- configure(CLKernelLibrary::get().get_compile_context(), input, info);
- }
+ void configure(const ICLTensor *input, GEMMRHSMatrixInfo info);
/** Configures the @ref CLGEMMReshapeRHSMatrixKernel kernel
*
@@ -88,15 +87,12 @@ public:
* @param[in] input Input tensor. Data types supported: All
* @param[in] info RHS matrix information to be used for reshaping.
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, GEMMRHSMatrixInfo info)
- {
- _kernel.configure(compile_context, input, &_output, info);
- }
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, GEMMRHSMatrixInfo info);
private:
- static constexpr uint32_t _uid = 0x15;
- CLTensor _output{};
- CLGEMMReshapeRHSMatrixKernel _kernel{};
+ static constexpr uint32_t _uid{ 0x15 };
+ CLTensor _output{};
+ std::unique_ptr<CLGEMMReshapeRHSMatrixKernel> _kernel;
};
} // namespace weights_transformations
@@ -126,6 +122,8 @@ public:
CLGEMM &operator=(const CLGEMM &) = delete;
/** Default move assignment operator */
CLGEMM &operator=(CLGEMM &&) = default;
+ /** Default destructor */
+ ~CLGEMM();
/** Initialise the kernel's inputs and output
*
* @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
@@ -198,24 +196,24 @@ private:
static Status validate_reshaped(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
static Status validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
- MemoryGroup _memory_group;
- IWeightsManager *_weights_manager;
- CLGEMMMatrixMultiplyKernel _mm_kernel;
- CLGEMMReshapeLHSMatrixKernel _reshape_lhs_kernel;
- CLGEMMReshapeRHSMatrixKernel _reshape_rhs_kernel;
- weights_transformations::CLGEMMReshapeRHSMatrixKernelManaged _reshape_rhs_kernel_managed;
- CLGEMMMatrixMultiplyReshapedKernel _mm_reshaped_kernel;
- CLGEMMMatrixMultiplyReshapedOnlyRHSKernel _mm_reshaped_only_rhs_kernel;
- CLGEMMMatrixMultiplyReshapedOnlyRHSKernel _mm_reshaped_only_rhs_fallback_kernel;
- CLTensor _tmp_a;
- CLTensor _tmp_b;
- const ICLTensor *_original_b;
- const ICLTensor *_lhs;
- ICLTensor *_dst;
- bool _reshape_b_only_on_first_run;
- bool _is_prepared;
- bool _has_pad_y;
- CLGEMMKernelType _gemm_kernel_type;
+ MemoryGroup _memory_group;
+ IWeightsManager *_weights_manager;
+ std::unique_ptr<CLGEMMMatrixMultiplyKernel> _mm_kernel;
+ std::unique_ptr<CLGEMMReshapeLHSMatrixKernel> _reshape_lhs_kernel;
+ std::unique_ptr<CLGEMMReshapeRHSMatrixKernel> _reshape_rhs_kernel;
+ std::unique_ptr<weights_transformations::CLGEMMReshapeRHSMatrixKernelManaged> _reshape_rhs_kernel_managed;
+ std::unique_ptr<CLGEMMMatrixMultiplyReshapedKernel> _mm_reshaped_kernel;
+ std::unique_ptr<CLGEMMMatrixMultiplyReshapedOnlyRHSKernel> _mm_reshaped_only_rhs_kernel;
+ std::unique_ptr<CLGEMMMatrixMultiplyReshapedOnlyRHSKernel> _mm_reshaped_only_rhs_fallback_kernel;
+ CLTensor _tmp_a;
+ CLTensor _tmp_b;
+ const ICLTensor *_original_b;
+ const ICLTensor *_lhs;
+ ICLTensor *_dst;
+ bool _reshape_b_only_on_first_run;
+ bool _is_prepared;
+ bool _has_pad_y;
+ CLGEMMKernelType _gemm_kernel_type;
};
} // namespace arm_compute
diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
index 467045cd86..340ac6e749 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
@@ -26,9 +26,7 @@
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
-#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
-#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
@@ -43,6 +41,9 @@
namespace arm_compute
{
+class CLCol2ImKernel;
+class CLIm2ColKernel;
+class CLWeightsReshapeKernel;
class ICLTensor;
/** Function to reshape and transpose the weights. This function calls the following kernels:
@@ -53,6 +54,16 @@ class CLConvolutionLayerReshapeWeights : public IFunction
public:
/** Constructor */
CLConvolutionLayerReshapeWeights();
+ /** Prevent instances of this class from being copied */
+ CLConvolutionLayerReshapeWeights(const CLConvolutionLayerReshapeWeights &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLConvolutionLayerReshapeWeights &operator=(const CLConvolutionLayerReshapeWeights &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLConvolutionLayerReshapeWeights(CLConvolutionLayerReshapeWeights &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLConvolutionLayerReshapeWeights &operator=(CLConvolutionLayerReshapeWeights &&) = delete;
+ /** Default destructor */
+ ~CLConvolutionLayerReshapeWeights();
/** Set the input and output tensors.
*
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
@@ -87,7 +98,7 @@ public:
void run() override;
private:
- CLWeightsReshapeKernel _weights_reshape_kernel;
+ std::unique_ptr<CLWeightsReshapeKernel> _weights_reshape_kernel;
};
namespace weights_transformations
@@ -179,6 +190,8 @@ public:
CLGEMMConvolutionLayer &operator=(const CLGEMMConvolutionLayer &) = delete;
/** Default move assignment operator */
CLGEMMConvolutionLayer &operator=(CLGEMMConvolutionLayer &&) = default;
+ /**Default destructor */
+ ~CLGEMMConvolutionLayer();
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -288,10 +301,10 @@ private:
IWeightsManager *_weights_manager;
CLConvolutionLayerReshapeWeights _reshape_weights;
weights_transformations::CLConvolutionLayerReshapeWeightsTransform _reshape_weights_managed;
- CLIm2ColKernel _im2col_kernel;
+ std::unique_ptr<CLIm2ColKernel> _im2col_kernel;
CLGEMM _mm_gemm;
CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
- CLCol2ImKernel _col2im_kernel;
+ std::unique_ptr<CLCol2ImKernel> _col2im_kernel;
CLActivationLayer _activationlayer_function;
const ICLTensor *_original_weights;
diff --git a/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h
index 1fedeff444..32af0f9427 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h
@@ -24,7 +24,6 @@
#ifndef ARM_COMPUTE_CLGEMMDECONVOLUTIONLAYER_H
#define ARM_COMPUTE_CLGEMMDECONVOLUTIONLAYER_H
-#include "arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
@@ -40,6 +39,7 @@
namespace arm_compute
{
+class CLDeconvolutionReshapeOutputKernel;
class ICLTensor;
/** Function to run the deconvolution layer through a call to GEMM.
*
@@ -89,6 +89,8 @@ public:
CLGEMMDeconvolutionLayer &operator=(const CLGEMMDeconvolutionLayer &) = delete;
/** Default move assignment operator */
CLGEMMDeconvolutionLayer &operator=(CLGEMMDeconvolutionLayer &&) = default;
+ /** Default desctructor */
+ ~CLGEMMDeconvolutionLayer();
/** Set the input, weights, biases and output tensors.
*
* @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs.
@@ -130,15 +132,15 @@ public:
private:
MemoryGroup _memory_group;
- CLGEMM _mm_gemm;
- CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
- CLGEMMLowpOutputStage _gemmlowp_output_stage;
- CLPermute _permute_input_to_nhwc;
- CLPermute _permute_weights_to_nhwc;
- CLReshapeLayer _reshape_weights;
- CLTranspose _transpose_weights;
- CLDeconvolutionReshapeOutputKernel _deconv_reshape;
- CLSlice _slice_gemm;
+ CLGEMM _mm_gemm;
+ CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
+ CLGEMMLowpOutputStage _gemmlowp_output_stage;
+ CLPermute _permute_input_to_nhwc;
+ CLPermute _permute_weights_to_nhwc;
+ CLReshapeLayer _reshape_weights;
+ CLTranspose _transpose_weights;
+ std::unique_ptr<CLDeconvolutionReshapeOutputKernel> _deconv_reshape;
+ CLSlice _slice_gemm;
CLTensor _gemmlowp_final;
CLTensor _reshaped_weights;
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
index 57b1e30df5..4cc8899690 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
@@ -24,21 +24,24 @@
#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H
#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H
-#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
namespace arm_compute
{
+class CLCompileContext;
class IMemoryManager;
class ICLTensor;
+class ITensorInfo;
+class CLDepthConvertLayerKernel;
+class CLGEMMLowpMatrixMultiplyNativeKernel;
+class CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel;
+class CLGEMMLowpOffsetContributionKernel;
+class CLGEMMLowpOffsetContributionOutputStageKernel;
+class CLGEMMLowpMatrixAReductionKernel;
+class CLGEMMLowpMatrixBReductionKernel;
+class CLGEMMReshapeRHSMatrixKernel;
/** Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. */
class CLGEMMLowpMatrixMultiplyCore : public IFunction
@@ -54,6 +57,8 @@ public:
CLGEMMLowpMatrixMultiplyCore &operator=(const CLGEMMLowpMatrixMultiplyCore &) = delete;
/** Default move assignment operator */
CLGEMMLowpMatrixMultiplyCore &operator=(CLGEMMLowpMatrixMultiplyCore &&) = default;
+ /** Default destructor */
+ ~CLGEMMLowpMatrixMultiplyCore();
/** Initialise the kernel's inputs, output
*
* @note GEMMLowp: low precision GEMM kernel. [A * B + C]
@@ -112,14 +117,14 @@ private:
MemoryGroup _memory_group;
// Kernels used
- CLDepthConvertLayerKernel _weights_to_qasymm8;
- CLGEMMLowpMatrixMultiplyNativeKernel _mm_native_kernel;
- CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel _mm_reshaped_only_rhs_kernel;
- CLGEMMReshapeRHSMatrixKernel _mtx_b_reshape_kernel;
- CLGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel;
- CLGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel;
- CLGEMMLowpOffsetContributionKernel _offset_contribution_kernel;
- CLGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel;
+ std::unique_ptr<CLDepthConvertLayerKernel> _weights_to_qasymm8;
+ std::unique_ptr<CLGEMMLowpMatrixMultiplyNativeKernel> _mm_native_kernel;
+ std::unique_ptr<CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel> _mm_reshaped_only_rhs_kernel;
+ std::unique_ptr<CLGEMMReshapeRHSMatrixKernel> _mtx_b_reshape_kernel;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _mtx_a_reduction_kernel;
+ std::unique_ptr<CLGEMMLowpMatrixBReductionKernel> _mtx_b_reduction_kernel;
+ std::unique_ptr<CLGEMMLowpOffsetContributionKernel> _offset_contribution_kernel;
+ std::unique_ptr<CLGEMMLowpOffsetContributionOutputStageKernel> _offset_contribution_output_stage_kernel;
// Temporary tensors
CLTensor _qasymm8_weights;
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h
index 44c52ffb79..a4edab9b8f 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h
@@ -24,8 +24,11 @@
#ifndef ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H
#define ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+#include <limits>
+
/** This file contains all available output stages for GEMMLowp on OpenCL.
*
* In gemmlowp, the "output stage" is the process that takes a final int32 accumulator value (the output of @ref CLGEMMLowpMatrixMultiplyCore),
@@ -36,7 +39,11 @@
namespace arm_compute
{
+class CLCompileContext;
class ITensor;
+class ICLTensor;
+class ITensorInfo;
+struct GEMMLowpOutputStageInfo;
/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on OpenCL.
*
diff --git a/arm_compute/runtime/CL/functions/CLGather.h b/arm_compute/runtime/CL/functions/CLGather.h
index e87a120ba1..9c659be6fc 100644
--- a/arm_compute/runtime/CL/functions/CLGather.h
+++ b/arm_compute/runtime/CL/functions/CLGather.h
@@ -25,11 +25,14 @@
#ifndef ARM_COMPUTE_CLGATHER_H
#define ARM_COMPUTE_CLGATHER_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLGatherKernel */
class CLGather : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLGaussian3x3.h b/arm_compute/runtime/CL/functions/CLGaussian3x3.h
index 9fe3e9bb00..286a17618b 100644
--- a/arm_compute/runtime/CL/functions/CLGaussian3x3.h
+++ b/arm_compute/runtime/CL/functions/CLGaussian3x3.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute gaussian filter 3x3. This function calls the following OpenCL kernels:
diff --git a/arm_compute/runtime/CL/functions/CLGaussian5x5.h b/arm_compute/runtime/CL/functions/CLGaussian5x5.h
index fb369d750b..cf5b79eaac 100644
--- a/arm_compute/runtime/CL/functions/CLGaussian5x5.h
+++ b/arm_compute/runtime/CL/functions/CLGaussian5x5.h
@@ -24,8 +24,6 @@
#ifndef ARM_COMPUTE_CLGAUSSIAN5X5_H
#define ARM_COMPUTE_CLGAUSSIAN5X5_H
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
@@ -37,6 +35,10 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLFillBorderKernel;
+class CLGaussian5x5HorKernel;
+class CLGaussian5x5VertKernel;
class ICLTensor;
/** Basic function to execute gaussian filter 5x5. This function calls the following OpenCL kernels:
@@ -54,6 +56,16 @@ public:
* @param[in] memory_manager (Optional) Memory manager.
*/
CLGaussian5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLGaussian5x5(const CLGaussian5x5 &) = delete;
+ /** Default move constructor */
+ CLGaussian5x5(CLGaussian5x5 &&) = default;
+ /** Prevent instances of this class from being copied */
+ CLGaussian5x5 &operator=(const CLGaussian5x5 &) = delete;
+ /** Default move assignment operator */
+ CLGaussian5x5 &operator=(CLGaussian5x5 &&) = default;
+ /** Default destructor */
+ ~CLGaussian5x5();
/** Initialise the function's source, destinations and border mode.
*
* @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
@@ -76,11 +88,11 @@ public:
void run() override;
protected:
- MemoryGroup _memory_group; /**< Function's memory group */
- CLGaussian5x5HorKernel _kernel_hor; /**< Horizontal pass kernel */
- CLGaussian5x5VertKernel _kernel_vert; /**< Vertical pass kernel */
- CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */
- CLImage _tmp; /**< Temporary buffer */
+ MemoryGroup _memory_group; /**< Function's memory group */
+ std::unique_ptr<CLGaussian5x5HorKernel> _kernel_hor; /**< Horizontal pass kernel */
+ std::unique_ptr<CLGaussian5x5VertKernel> _kernel_vert; /**< Vertical pass kernel */
+ std::unique_ptr<CLFillBorderKernel> _border_handler; /**< Kernel to handle image borders */
+ CLImage _tmp; /**< Temporary buffer */
};
}
#endif /*ARM_COMPUTE_CLGAUSSIAN5X5_H */
diff --git a/arm_compute/runtime/CL/functions/CLGaussianPyramid.h b/arm_compute/runtime/CL/functions/CLGaussianPyramid.h
index 70f324be11..b18e5f98f0 100644
--- a/arm_compute/runtime/CL/functions/CLGaussianPyramid.h
+++ b/arm_compute/runtime/CL/functions/CLGaussianPyramid.h
@@ -24,9 +24,6 @@
#ifndef ARM_COMPUTE_CLGAUSSIANPYRAMID_H
#define ARM_COMPUTE_CLGAUSSIANPYRAMID_H
-#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h"
-
-#include "arm_compute/core/CL/kernels/CLScaleKernel.h"
#include "arm_compute/core/IPyramid.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLPyramid.h"
@@ -38,7 +35,12 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLFillBorderKernel;
class ICLTensor;
+class CLGaussianPyramidHorKernel;
+class CLGaussianPyramidVertKernel;
+class CLScaleKernel;
/** Common interface for all Gaussian pyramid functions */
class CLGaussianPyramid : public IFunction
@@ -55,7 +57,7 @@ public:
/** Allow instances of this class to be moved */
CLGaussianPyramid &operator=(CLGaussianPyramid &&) = default;
/** Default destructor */
- virtual ~CLGaussianPyramid() = default;
+ ~CLGaussianPyramid();
/** Initialise the function's source, destinations and border mode.
*
* @param[in, out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
@@ -93,6 +95,12 @@ class CLGaussianPyramidHalf : public CLGaussianPyramid
public:
/** Constructor */
CLGaussianPyramidHalf();
+ /** Prevent instances of this class from being copied */
+ CLGaussianPyramidHalf(const CLGaussianPyramidHalf &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLGaussianPyramidHalf &operator=(const CLGaussianPyramidHalf &) = delete;
+ /** Default destructor */
+ ~CLGaussianPyramidHalf();
// Inherited methods overridden:
void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
@@ -100,10 +108,10 @@ public:
void run() override;
private:
- std::vector<CLFillBorderKernel> _horizontal_border_handler;
- std::vector<CLFillBorderKernel> _vertical_border_handler;
- std::vector<CLGaussianPyramidHorKernel> _horizontal_reduction;
- std::vector<CLGaussianPyramidVertKernel> _vertical_reduction;
+ std::vector<std::unique_ptr<CLFillBorderKernel>> _horizontal_border_handler;
+ std::vector<std::unique_ptr<CLFillBorderKernel>> _vertical_border_handler;
+ std::vector<std::unique_ptr<CLGaussianPyramidHorKernel>> _horizontal_reduction;
+ std::vector<std::unique_ptr<CLGaussianPyramidVertKernel>> _vertical_reduction;
};
/** Basic function to execute gaussian pyramid with ORB scale factor. This function calls the following OpenCL kernels and functions:
@@ -124,8 +132,8 @@ public:
void run() override;
private:
- std::vector<CLGaussian5x5> _gauss5x5;
- std::vector<CLScaleKernel> _scale_nearest;
+ std::vector<CLGaussian5x5> _gauss5x5;
+ std::vector<std::unique_ptr<CLScaleKernel>> _scale_nearest;
};
}
#endif /*ARM_COMPUTE_CLGAUSSIANPYRAMID_H */
diff --git a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
index 6d5f2e5d71..0fb9a06c84 100644
--- a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
@@ -23,12 +23,7 @@
*/
#ifndef ARM_COMPUTE_CLGENERATEPROPOSALSLAYER_H
#define ARM_COMPUTE_CLGENERATEPROPOSALSLAYER_H
-#include "arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h"
-#include "arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLPermuteKernel.h"
-#include "arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h"
+
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTensor.h"
@@ -38,9 +33,19 @@
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class CLBoundingBoxTransformKernel;
+class CLDequantizationLayerKernel;
+class CLComputeAllAnchorsKernel;
+class CLPadLayerKernel;
+class CLPermuteKernel;
+class CLQuantizationLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to generate proposals for a RPN (Region Proposal Network)
*
@@ -67,6 +72,8 @@ public:
CLGenerateProposalsLayer(const CLGenerateProposalsLayer &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
CLGenerateProposalsLayer &operator=(const CLGenerateProposalsLayer &) = delete;
+ /** Default destructor */
+ ~CLGenerateProposalsLayer();
/** Set the input and output tensors.
*
@@ -130,16 +137,16 @@ private:
MemoryGroup _memory_group;
// OpenCL kernels
- CLPermuteKernel _permute_deltas_kernel;
- CLReshapeLayer _flatten_deltas;
- CLPermuteKernel _permute_scores_kernel;
- CLReshapeLayer _flatten_scores;
- CLComputeAllAnchorsKernel _compute_anchors_kernel;
- CLBoundingBoxTransformKernel _bounding_box_kernel;
- CLPadLayerKernel _pad_kernel;
- CLDequantizationLayerKernel _dequantize_anchors;
- CLDequantizationLayerKernel _dequantize_deltas;
- CLQuantizationLayerKernel _quantize_all_proposals;
+ std::unique_ptr<CLPermuteKernel> _permute_deltas_kernel;
+ CLReshapeLayer _flatten_deltas;
+ std::unique_ptr<CLPermuteKernel> _permute_scores_kernel;
+ CLReshapeLayer _flatten_scores;
+ std::unique_ptr<CLComputeAllAnchorsKernel> _compute_anchors_kernel;
+ std::unique_ptr<CLBoundingBoxTransformKernel> _bounding_box_kernel;
+ std::unique_ptr<CLPadLayerKernel> _pad_kernel;
+ std::unique_ptr<CLDequantizationLayerKernel> _dequantize_anchors;
+ std::unique_ptr<CLDequantizationLayerKernel> _dequantize_deltas;
+ std::unique_ptr<CLQuantizationLayerKernel> _quantize_all_proposals;
// CPP functions
CPPBoxWithNonMaximaSuppressionLimit _cpp_nms;
diff --git a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h
index dad7e6edf8..fa37b3c84e 100644
--- a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h
+++ b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h
@@ -24,7 +24,6 @@
#ifndef ARM_COMPUTE_CLHOGDESCRIPTOR_H
#define ARM_COMPUTE_CLHOGDESCRIPTOR_H
-#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLHOGGradient.h"
@@ -37,6 +36,8 @@
namespace arm_compute
{
class IHOG;
+class CLHOGOrientationBinningKernel;
+class CLHOGBlockNormalizationKernel;
/** Basic function to calculate HOG descriptor. This function calls the following OpenCL kernels:
*
* -# @ref CLHOGGradient
@@ -49,6 +50,12 @@ class CLHOGDescriptor : public IFunction
public:
/** Default constructor */
CLHOGDescriptor(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLHOGDescriptor(const CLHOGDescriptor &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLHOGDescriptor &operator=(const CLHOGDescriptor &) = delete;
+ /** Default destructor */
+ ~CLHOGDescriptor();
/** Initialise the function's source, destination, HOG data-object and border mode
*
* @param[in, out] input Input tensor. Data type supported: U8
@@ -75,13 +82,13 @@ public:
void run() override;
private:
- MemoryGroup _memory_group;
- CLHOGGradient _gradient;
- CLHOGOrientationBinningKernel _orient_bin;
- CLHOGBlockNormalizationKernel _block_norm;
- CLTensor _mag;
- CLTensor _phase;
- CLTensor _hog_space;
+ MemoryGroup _memory_group;
+ CLHOGGradient _gradient;
+ std::unique_ptr<CLHOGOrientationBinningKernel> _orient_bin;
+ std::unique_ptr<CLHOGBlockNormalizationKernel> _block_norm;
+ CLTensor _mag;
+ CLTensor _phase;
+ CLTensor _hog_space;
};
}
diff --git a/arm_compute/runtime/CL/functions/CLHOGDetector.h b/arm_compute/runtime/CL/functions/CLHOGDetector.h
index 6697b5c24d..edc5b652d3 100644
--- a/arm_compute/runtime/CL/functions/CLHOGDetector.h
+++ b/arm_compute/runtime/CL/functions/CLHOGDetector.h
@@ -24,13 +24,20 @@
#ifndef ARM_COMPUTE_CLHOGDETECTOR_H
#define ARM_COMPUTE_CLHOGDETECTOR_H
+#include "arm_compute/core/CL/ICLArray.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h"
#include "arm_compute/core/IHOG.h"
#include "arm_compute/runtime/IFunction.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class CLHOGDetectorKernel;
+class ICLTensor;
+class ICLHOG;
+
/** Basic function to execute HOG detector based on linear SVM. This function calls the following OpenCL kernel:
*
* -# @ref CLHOGDetectorKernel
@@ -50,7 +57,7 @@ public:
/** Allow instances of this class to be moved */
CLHOGDetector &operator=(CLHOGDetector &&) = default;
/** Default destructor */
- ~CLHOGDetector() = default;
+ ~CLHOGDetector();
/** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class
*
* @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it.
@@ -78,16 +85,16 @@ public:
* @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to
*/
void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride,
- float threshold = 0.0f,
+ float threshold = 0.0f,
size_t idx_class = 0);
// Inherited methods overridden:
void run() override;
private:
- CLHOGDetectorKernel _hog_detector_kernel;
- ICLDetectionWindowArray *_detection_windows;
- cl::Buffer _num_detection_windows;
+ std::unique_ptr<CLHOGDetectorKernel> _hog_detector_kernel;
+ ICLDetectionWindowArray *_detection_windows;
+ cl::Buffer _num_detection_windows;
};
}
diff --git a/arm_compute/runtime/CL/functions/CLHOGGradient.h b/arm_compute/runtime/CL/functions/CLHOGGradient.h
index b0589027e7..39d26fb110 100644
--- a/arm_compute/runtime/CL/functions/CLHOGGradient.h
+++ b/arm_compute/runtime/CL/functions/CLHOGGradient.h
@@ -24,9 +24,6 @@
#ifndef ARM_COMPUTE_CLHOGGRADIENT_H
#define ARM_COMPUTE_CLHOGGRADIENT_H
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLDerivative.h"
@@ -39,6 +36,9 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLMagnitudePhaseKernel;
+class ITensorInfo;
/** Basic function to calculate the gradient for HOG. This function calls the following OpenCL kernels:
*
* -# @ref CLDerivative
@@ -79,11 +79,11 @@ public:
void run() override;
private:
- MemoryGroup _memory_group;
- CLDerivative _derivative;
- CLMagnitudePhaseKernel _mag_phase;
- CLTensor _gx;
- CLTensor _gy;
+ MemoryGroup _memory_group;
+ CLDerivative _derivative;
+ std::unique_ptr<CLMagnitudePhaseKernel> _mag_phase;
+ CLTensor _gx;
+ CLTensor _gy;
};
}
#endif /*ARM_COMPUTE_CLHOGGRADIENT_H */
diff --git a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h
index e7631c2c5a..2a2c9a0a5c 100644
--- a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h
+++ b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h
@@ -26,7 +26,6 @@
#include "arm_compute/core/CL/ICLArray.h"
#include "arm_compute/core/CL/ICLMultiHOG.h"
-#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h"
#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLHOGDetector.h"
@@ -39,6 +38,9 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLHOGOrientationBinningKernel;
+class CLHOGBlockNormalizationKernel;
/** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following kernels:
*
* -# @ref CLHOGGradient
@@ -62,6 +64,8 @@ public:
CLHOGMultiDetection(const CLHOGMultiDetection &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
CLHOGMultiDetection &operator=(const CLHOGMultiDetection &) = delete;
+ /** Default destructor */
+ ~CLHOGMultiDetection();
/** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression
*
* @param[in, out] input Input tensor. Data type supported: U8
@@ -110,21 +114,21 @@ public:
void run() override;
private:
- MemoryGroup _memory_group;
- CLHOGGradient _gradient_kernel;
- std::vector<CLHOGOrientationBinningKernel> _orient_bin_kernel;
- std::vector<CLHOGBlockNormalizationKernel> _block_norm_kernel;
- std::vector<CLHOGDetector> _hog_detect_kernel;
- CPPDetectionWindowNonMaximaSuppressionKernel _non_maxima_kernel;
- std::vector<CLTensor> _hog_space;
- std::vector<CLTensor> _hog_norm_space;
- ICLDetectionWindowArray *_detection_windows;
- CLTensor _mag;
- CLTensor _phase;
- bool _non_maxima_suppression;
- size_t _num_orient_bin_kernel;
- size_t _num_block_norm_kernel;
- size_t _num_hog_detect_kernel;
+ MemoryGroup _memory_group;
+ CLHOGGradient _gradient_kernel;
+ std::vector<std::unique_ptr<CLHOGOrientationBinningKernel>> _orient_bin_kernel;
+ std::vector<std::unique_ptr<CLHOGBlockNormalizationKernel>> _block_norm_kernel;
+ std::vector<CLHOGDetector> _hog_detect_kernel;
+ CPPDetectionWindowNonMaximaSuppressionKernel _non_maxima_kernel;
+ std::vector<CLTensor> _hog_space;
+ std::vector<CLTensor> _hog_norm_space;
+ ICLDetectionWindowArray *_detection_windows;
+ CLTensor _mag;
+ CLTensor _phase;
+ bool _non_maxima_suppression;
+ size_t _num_orient_bin_kernel;
+ size_t _num_block_norm_kernel;
+ size_t _num_hog_detect_kernel;
};
}
diff --git a/arm_compute/runtime/CL/functions/CLHarrisCorners.h b/arm_compute/runtime/CL/functions/CLHarrisCorners.h
index 326a895d39..c9c67f5a28 100644
--- a/arm_compute/runtime/CL/functions/CLHarrisCorners.h
+++ b/arm_compute/runtime/CL/functions/CLHarrisCorners.h
@@ -24,16 +24,13 @@
#ifndef ARM_COMPUTE_CLHARRISCORNERS_H
#define ARM_COMPUTE_CLHARRISCORNERS_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h"
#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h"
#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include <cstdint>
@@ -41,6 +38,9 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLHarrisScoreKernel;
+class CLFillBorderKernel;
class ICLTensor;
using ICLImage = ICLTensor;
@@ -66,6 +66,8 @@ public:
CLHarrisCorners(const CLHarrisCorners &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
const CLHarrisCorners &operator=(const CLHarrisCorners &) = delete;
+ /** Default destructor */
+ ~CLHarrisCorners();
/** Initialize the function's source, destination, conv and border_mode.
*
* @param[in,out] input Source image. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
@@ -104,21 +106,21 @@ public:
void run() override;
private:
- MemoryGroup _memory_group; /**< Function's memory group */
- std::unique_ptr<IFunction> _sobel; /**< Sobel function */
- CLHarrisScoreKernel _harris_score; /**< Harris score kernel */
- CLNonMaximaSuppression3x3 _non_max_suppr; /**< Non-maxima suppression function */
- CPPCornerCandidatesKernel _candidates; /**< Sort kernel */
- CPPSortEuclideanDistanceKernel _sort_euclidean; /**< Euclidean distance kernel */
- CLFillBorderKernel _border_gx; /**< Border handler before running harris score */
- CLFillBorderKernel _border_gy; /**< Border handler before running harris score */
- CLImage _gx; /**< Source image - Gx component */
- CLImage _gy; /**< Source image - Gy component */
- CLImage _score; /**< Source image - Harris score */
- CLImage _nonmax; /**< Source image - Non-Maxima suppressed image */
- std::vector<InternalKeypoint> _corners_list; /**< Array of InternalKeypoint. It stores the potential corner candidates */
- int32_t _num_corner_candidates; /**< Number of potential corner candidates */
- ICLKeyPointArray *_corners; /**< Output corners array */
+ MemoryGroup _memory_group; /**< Function's memory group */
+ std::unique_ptr<IFunction> _sobel; /**< Sobel function */
+ std::unique_ptr<CLHarrisScoreKernel> _harris_score; /**< Harris score kernel */
+ CLNonMaximaSuppression3x3 _non_max_suppr; /**< Non-maxima suppression function */
+ CPPCornerCandidatesKernel _candidates; /**< Sort kernel */
+ CPPSortEuclideanDistanceKernel _sort_euclidean; /**< Euclidean distance kernel */
+ std::unique_ptr<CLFillBorderKernel> _border_gx; /**< Border handler before running harris score */
+ std::unique_ptr<CLFillBorderKernel> _border_gy; /**< Border handler before running harris score */
+ CLImage _gx; /**< Source image - Gx component */
+ CLImage _gy; /**< Source image - Gy component */
+ CLImage _score; /**< Source image - Harris score */
+ CLImage _nonmax; /**< Source image - Non-Maxima suppressed image */
+ std::vector<InternalKeypoint> _corners_list; /**< Array of InternalKeypoint. It stores the potential corner candidates */
+ int32_t _num_corner_candidates; /**< Number of potential corner candidates */
+ ICLKeyPointArray *_corners; /**< Output corners array */
};
}
#endif /*ARM_COMPUTE_CLHARRISCORNERS_H */
diff --git a/arm_compute/runtime/CL/functions/CLHistogram.h b/arm_compute/runtime/CL/functions/CLHistogram.h
index 7fdb8a9022..164bd0a28a 100644
--- a/arm_compute/runtime/CL/functions/CLHistogram.h
+++ b/arm_compute/runtime/CL/functions/CLHistogram.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_CLHISTOGRAM_H
#define ARM_COMPUTE_CLHISTOGRAM_H
-#include "arm_compute/core/CL/kernels/CLHistogramKernel.h"
#include "arm_compute/runtime/IFunction.h"
+#include "src/core/CL/kernels/CLHistogramKernel.h"
namespace arm_compute
{
diff --git a/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h
index d7aa11cbc8..d41f3fedf6 100644
--- a/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h
@@ -24,11 +24,14 @@
#ifndef ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYER_H
#define ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYER_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to perform a Instance normalization.
*
diff --git a/arm_compute/runtime/CL/functions/CLIntegralImage.h b/arm_compute/runtime/CL/functions/CLIntegralImage.h
index 6b10ede650..0ecdbde8fe 100644
--- a/arm_compute/runtime/CL/functions/CLIntegralImage.h
+++ b/arm_compute/runtime/CL/functions/CLIntegralImage.h
@@ -24,11 +24,15 @@
#ifndef ARM_COMPUTE_CLINTEGRALIMAGE_H
#define ARM_COMPUTE_CLINTEGRALIMAGE_H
-#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h"
#include "arm_compute/runtime/IFunction.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class CLIntegralImageHorKernel;
+class CLIntegralImageVertKernel;
class ICLTensor;
/** Basic function to execute integral image. This function calls the following OpenCL kernels:
@@ -42,6 +46,12 @@ class CLIntegralImage : public IFunction
public:
/** Default Constructor. */
CLIntegralImage();
+ /** Prevent instances of this class from being copied */
+ CLIntegralImage(const CLIntegralImage &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLIntegralImage &operator=(const CLIntegralImage &) = delete;
+ /** Default destructor */
+ ~CLIntegralImage();
/** Initialise the function's source, destinations and border mode.
*
* @param[in] input Source tensor. Data types supported: U8.
@@ -60,8 +70,8 @@ public:
void run() override;
protected:
- CLIntegralImageHorKernel _integral_hor; /**< Integral Image Horizontal kernel */
- CLIntegralImageVertKernel _integral_vert; /**< Integral Image Vertical kernel */
+ std::unique_ptr<CLIntegralImageHorKernel> _integral_hor; /**< Integral Image Horizontal kernel */
+ std::unique_ptr<CLIntegralImageVertKernel> _integral_vert; /**< Integral Image Vertical kernel */
};
}
#endif /*ARM_COMPUTE_CLINTEGRALIMAGE_H */
diff --git a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h
index bc79101d9d..401d249eb4 100644
--- a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h
+++ b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h
@@ -24,7 +24,6 @@
#ifndef ARM_COMPUTE_CLL2NORMALIZELAYER_H
#define ARM_COMPUTE_CLL2NORMALIZELAYER_H
-#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
@@ -37,7 +36,10 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLL2NormalizeLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to perform a L2 normalization on a given axis.
*
@@ -50,6 +52,16 @@ class CLL2NormalizeLayer : public IFunction
public:
/** Constructor */
CLL2NormalizeLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Default Destructor */
+ ~CLL2NormalizeLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLL2NormalizeLayer(const CLL2NormalizeLayer &) = delete;
+ /** Default move constructor */
+ CLL2NormalizeLayer(CLL2NormalizeLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLL2NormalizeLayer &operator=(const CLL2NormalizeLayer &) = delete;
+ /** Default move assignment operator */
+ CLL2NormalizeLayer &operator=(CLL2NormalizeLayer &&) = default;
/** Set the input and output tensors.
*
@@ -84,10 +96,10 @@ public:
void run() override;
private:
- MemoryGroup _memory_group;
- CLReductionOperation _reduce_func;
- CLL2NormalizeLayerKernel _normalize_kernel;
- CLTensor _sumsq;
+ MemoryGroup _memory_group;
+ CLReductionOperation _reduce_func;
+ std::unique_ptr<CLL2NormalizeLayerKernel> _normalize_kernel;
+ CLTensor _sumsq;
};
}
#endif /*ARM_COMPUTE_CLL2NORMALIZELAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h
index 1a8b33463d..017f26aa1e 100644
--- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h
+++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h
@@ -26,8 +26,6 @@
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
@@ -45,6 +43,10 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLCopyKernel;
+class CLMemsetKernel;
+class CLTransposeKernel;
class ICLTensor;
/** This function performs a single time step in a Long Short-Term Memory (LSTM) layer.
@@ -55,6 +57,16 @@ class CLLSTMLayer : public IFunction
public:
/** Default constructor */
CLLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLLSTMLayer(const CLLSTMLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLLSTMLayer &operator=(const CLLSTMLayer &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLLSTMLayer(CLLSTMLayer &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLLSTMLayer &operator=(CLLSTMLayer &&) = delete;
+ /** Default destructor */
+ ~CLLSTMLayer();
/** Initialize function's tensors.
*
* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32.
@@ -200,90 +212,90 @@ public:
void prepare() override;
private:
- MemoryGroup _memory_group;
- CLFullyConnectedLayer _fully_connected_input_gate;
- CLArithmeticAddition _accum_input_gate1;
- CLArithmeticSubtraction _subtract_input_gate;
- CLPixelWiseMultiplication _pixelwise_mul_input_gate;
- CLActivationLayer _activation_input_gate;
- CLFullyConnectedLayer _fully_connected_forget_gate;
- CLArithmeticAddition _accum_forget_gate1;
- CLPixelWiseMultiplication _pixelwise_mul_forget_gate;
- CLActivationLayer _activation_forget_gate;
- CLFullyConnectedLayer _fully_connected_cell_state;
- CLGEMM _gemm_cell_state1;
- CLTransposeKernel _transpose_cell_state;
- CLArithmeticAddition _accum_cell_state1;
- CLArithmeticAddition _accum_cell_state2;
- CLPixelWiseMultiplication _pixelwise_mul_cell_state1;
- CLActivationLayer _activation_cell_state;
- CLActivationLayer _cell_clip;
- CLPixelWiseMultiplication _pixelwise_mul_cell_state2;
- CLFullyConnectedLayer _fully_connected_output;
- CLPixelWiseMultiplication _pixelwise_mul_output_state1;
- CLArithmeticAddition _accum_output1;
- CLActivationLayer _activation_output;
- CLActivationLayer _activation_output_state;
- CLPixelWiseMultiplication _pixelwise_mul_output_state2;
- CLFullyConnectedLayer _fully_connected_output_state;
- CLActivationLayer _projection_clip;
- CLCopyKernel _copy_cell_state;
- CLCopyKernel _copy_output;
- CLConcatenateLayer _concat_scratch_buffer;
- CLConcatenateLayer _concat_inputs_forget_gate;
- CLConcatenateLayer _concat_weights_forget_gate;
- CLConcatenateLayer _concat_weights_input_gate;
- CLConcatenateLayer _concat_weights_output;
- CLMemsetKernel _ones_memset_kernel;
- CLMeanStdDevNormalizationLayer _mean_std_norm_input_gate;
- CLPixelWiseMultiplication _pixelwise_mul_input_gate_coeff;
- CLArithmeticAddition _accum_input_gate_bias;
- CLMeanStdDevNormalizationLayer _mean_std_norm_forget_gate;
- CLPixelWiseMultiplication _pixelwise_mul_forget_gate_coeff;
- CLArithmeticAddition _accum_forget_gate_bias;
- CLMeanStdDevNormalizationLayer _mean_std_norm_cell_gate;
- CLPixelWiseMultiplication _pixelwise_mul_cell_gate_coeff;
- CLArithmeticAddition _accum_cell_gate_bias;
- CLMeanStdDevNormalizationLayer _mean_std_norm_output_gate;
- CLPixelWiseMultiplication _pixelwise_mul_output_gate_coeff;
- CLArithmeticAddition _accum_output_gate_bias;
- CLTensor _input_gate_out1;
- CLTensor _input_gate_out2;
- CLTensor _input_gate_out3;
- CLTensor _input_gate_out4;
- CLTensor _forget_gate_out1;
- CLTensor _forget_gate_out2;
- CLTensor _forget_gate_out3;
- CLTensor _forget_gate_out4;
- CLTensor _forget_gate_out5;
- CLTensor _forget_gate_out6;
- CLTensor _cell_state_out1;
- CLTensor _cell_state_out2;
- CLTensor _cell_state_out3;
- CLTensor _cell_state_out4;
- CLTensor _cell_state_out5;
- CLTensor _output1;
- CLTensor _output2;
- CLTensor _output3;
- CLTensor _output4;
- CLTensor _cell_state_activation;
- CLTensor _output_state1;
- CLTensor _ones;
- CLTensor _input_layer_norm_out1;
- CLTensor _input_layer_norm_out2;
- CLTensor _forget_layer_norm_out1;
- CLTensor _forget_layer_norm_out2;
- CLTensor _cell_layer_norm_out1;
- CLTensor _cell_layer_norm_out2;
- CLTensor _output_layer_norm_out1;
- CLTensor _output_layer_norm_out2;
- bool _run_peephole_opt;
- bool _run_cifg_opt;
- bool _perform_cell_clipping;
- bool _has_projection_weights;
- bool _perform_projection_clipping;
- bool _is_prepared;
- bool _is_layer_norm_lstm;
+ MemoryGroup _memory_group;
+ CLFullyConnectedLayer _fully_connected_input_gate;
+ CLArithmeticAddition _accum_input_gate1;
+ CLArithmeticSubtraction _subtract_input_gate;
+ CLPixelWiseMultiplication _pixelwise_mul_input_gate;
+ CLActivationLayer _activation_input_gate;
+ CLFullyConnectedLayer _fully_connected_forget_gate;
+ CLArithmeticAddition _accum_forget_gate1;
+ CLPixelWiseMultiplication _pixelwise_mul_forget_gate;
+ CLActivationLayer _activation_forget_gate;
+ CLFullyConnectedLayer _fully_connected_cell_state;
+ CLGEMM _gemm_cell_state1;
+ std::unique_ptr<CLTransposeKernel> _transpose_cell_state;
+ CLArithmeticAddition _accum_cell_state1;
+ CLArithmeticAddition _accum_cell_state2;
+ CLPixelWiseMultiplication _pixelwise_mul_cell_state1;
+ CLActivationLayer _activation_cell_state;
+ CLActivationLayer _cell_clip;
+ CLPixelWiseMultiplication _pixelwise_mul_cell_state2;
+ CLFullyConnectedLayer _fully_connected_output;
+ CLPixelWiseMultiplication _pixelwise_mul_output_state1;
+ CLArithmeticAddition _accum_output1;
+ CLActivationLayer _activation_output;
+ CLActivationLayer _activation_output_state;
+ CLPixelWiseMultiplication _pixelwise_mul_output_state2;
+ CLFullyConnectedLayer _fully_connected_output_state;
+ CLActivationLayer _projection_clip;
+ std::unique_ptr<CLCopyKernel> _copy_cell_state;
+ std::unique_ptr<CLCopyKernel> _copy_output;
+ CLConcatenateLayer _concat_scratch_buffer;
+ CLConcatenateLayer _concat_inputs_forget_gate;
+ CLConcatenateLayer _concat_weights_forget_gate;
+ CLConcatenateLayer _concat_weights_input_gate;
+ CLConcatenateLayer _concat_weights_output;
+ std::unique_ptr<CLMemsetKernel> _ones_memset_kernel;
+ CLMeanStdDevNormalizationLayer _mean_std_norm_input_gate;
+ CLPixelWiseMultiplication _pixelwise_mul_input_gate_coeff;
+ CLArithmeticAddition _accum_input_gate_bias;
+ CLMeanStdDevNormalizationLayer _mean_std_norm_forget_gate;
+ CLPixelWiseMultiplication _pixelwise_mul_forget_gate_coeff;
+ CLArithmeticAddition _accum_forget_gate_bias;
+ CLMeanStdDevNormalizationLayer _mean_std_norm_cell_gate;
+ CLPixelWiseMultiplication _pixelwise_mul_cell_gate_coeff;
+ CLArithmeticAddition _accum_cell_gate_bias;
+ CLMeanStdDevNormalizationLayer _mean_std_norm_output_gate;
+ CLPixelWiseMultiplication _pixelwise_mul_output_gate_coeff;
+ CLArithmeticAddition _accum_output_gate_bias;
+ CLTensor _input_gate_out1;
+ CLTensor _input_gate_out2;
+ CLTensor _input_gate_out3;
+ CLTensor _input_gate_out4;
+ CLTensor _forget_gate_out1;
+ CLTensor _forget_gate_out2;
+ CLTensor _forget_gate_out3;
+ CLTensor _forget_gate_out4;
+ CLTensor _forget_gate_out5;
+ CLTensor _forget_gate_out6;
+ CLTensor _cell_state_out1;
+ CLTensor _cell_state_out2;
+ CLTensor _cell_state_out3;
+ CLTensor _cell_state_out4;
+ CLTensor _cell_state_out5;
+ CLTensor _output1;
+ CLTensor _output2;
+ CLTensor _output3;
+ CLTensor _output4;
+ CLTensor _cell_state_activation;
+ CLTensor _output_state1;
+ CLTensor _ones;
+ CLTensor _input_layer_norm_out1;
+ CLTensor _input_layer_norm_out2;
+ CLTensor _forget_layer_norm_out1;
+ CLTensor _forget_layer_norm_out2;
+ CLTensor _cell_layer_norm_out1;
+ CLTensor _cell_layer_norm_out2;
+ CLTensor _output_layer_norm_out1;
+ CLTensor _output_layer_norm_out2;
+ bool _run_peephole_opt;
+ bool _run_cifg_opt;
+ bool _perform_cell_clipping;
+ bool _has_projection_weights;
+ bool _perform_projection_clipping;
+ bool _is_prepared;
+ bool _is_layer_norm_lstm;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLLSTMLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
index ba85c6140c..3bbf9f2c30 100644
--- a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
@@ -26,10 +26,6 @@
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
-#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
-#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
-#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IMemoryManager.h"
@@ -39,7 +35,13 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLCol2ImKernel;
+class CLIm2ColKernel;
+class CLWeightsReshapeKernel;
+class CLLocallyConnectedMatrixMultiplyKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to compute the locally connected layer. This function calls the following OpenCL kernels:
*
@@ -108,16 +110,16 @@ public:
void prepare() override;
private:
- MemoryGroup _memory_group;
- CLIm2ColKernel _input_im2col_kernel;
- CLWeightsReshapeKernel _weights_reshape_kernel;
- CLLocallyConnectedMatrixMultiplyKernel _mm_kernel;
- CLCol2ImKernel _output_col2im_kernel;
- CLTensor _input_im2col_reshaped;
- CLTensor _weights_reshaped;
- CLTensor _gemm_output;
- bool _is_prepared;
- const ICLTensor *_original_weights;
+ MemoryGroup _memory_group;
+ std::unique_ptr<CLIm2ColKernel> _input_im2col_kernel;
+ std::unique_ptr<CLWeightsReshapeKernel> _weights_reshape_kernel;
+ std::unique_ptr<CLLocallyConnectedMatrixMultiplyKernel> _mm_kernel;
+ std::unique_ptr<CLCol2ImKernel> _output_col2im_kernel;
+ CLTensor _input_im2col_reshaped;
+ CLTensor _weights_reshaped;
+ CLTensor _gemm_output;
+ bool _is_prepared;
+ const ICLTensor *_original_weights;
};
}
#endif /* ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLMagnitude.h b/arm_compute/runtime/CL/functions/CLMagnitude.h
index ad7cc778e5..6ac141641c 100644
--- a/arm_compute/runtime/CL/functions/CLMagnitude.h
+++ b/arm_compute/runtime/CL/functions/CLMagnitude.h
@@ -29,6 +29,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLMagnitudePhaseKernel. */
diff --git a/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h b/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h
index 5c8548f9e0..693862fb89 100644
--- a/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h
+++ b/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h
@@ -24,14 +24,19 @@
#ifndef ARM_COMPUTE_CLMAXUNPOOLINGLAYER_H
#define ARM_COMPUTE_CLMAXUNPOOLINGLAYER_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLMaxUnpoolingLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
+#include <memory>
namespace arm_compute
{
-class ITensor;
+class CLCompileContext;
+class ICLTensor;
+class ITensorInfo;
+class CLMaxUnpoolingLayerKernel;
+class CLMemsetKernel;
+struct PoolingLayerInfo;
/** Function to perform MaxUnpooling. This function calls the following OpenCL kernels:
*
@@ -43,6 +48,12 @@ class CLMaxUnpoolingLayer : public IFunction
public:
/** Constructor */
CLMaxUnpoolingLayer();
+ /** Prevent instances of this class from being copied */
+ CLMaxUnpoolingLayer(const CLMaxUnpoolingLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLMaxUnpoolingLayer &operator=(const CLMaxUnpoolingLayer &) = delete;
+ /** Default destructor */
+ ~CLMaxUnpoolingLayer();
/** Set the input and output tensors.
*
* @note Output shape must be equal to the shape of the original input to pool.
@@ -88,8 +99,8 @@ public:
void run() override;
private:
- CLMemsetKernel _memset_kernel;
- CLMaxUnpoolingLayerKernel _unpooling_layer_kernel;
+ std::unique_ptr<CLMemsetKernel> _memset_kernel;
+ std::unique_ptr<CLMaxUnpoolingLayerKernel> _unpooling_layer_kernel;
};
}
#endif /* ARM_COMPUTE_CLMAXUNPOOLINGLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLMeanStdDev.h b/arm_compute/runtime/CL/functions/CLMeanStdDev.h
index be192a7c11..d9ced1393e 100644
--- a/arm_compute/runtime/CL/functions/CLMeanStdDev.h
+++ b/arm_compute/runtime/CL/functions/CLMeanStdDev.h
@@ -25,15 +25,20 @@
#define ARM_COMPUTE_CLMEANSTDDEV_H
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h"
#include "arm_compute/runtime/CL/functions/CLReductionOperation.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class ICLTensor;
+class ITensorInfo;
+class CLFillBorderKernel;
+class CLMeanStdDevKernel;
/** Basic function to execute mean and standard deviation by calling @ref CLMeanStdDevKernel */
class CLMeanStdDev : public IFunction
{
@@ -49,7 +54,7 @@ public:
/** Allow instances of this class to be moved */
CLMeanStdDev &operator=(CLMeanStdDev &&) = default;
/** Default destructor */
- ~CLMeanStdDev() = default;
+ ~CLMeanStdDev();
/** Initialise the kernel's inputs and outputs.
*
* @param[in, out] input Input image. Data types supported: U8/F16/F32. (Written to only for border filling)
@@ -83,20 +88,20 @@ private:
void run_float();
void run_int();
- MemoryGroup _memory_group; /**< Function's memory group */
- DataType _data_type; /**< Input data type. */
- unsigned int _num_pixels; /**< Number of image's pixels. */
- bool _run_stddev; /**< Flag for knowing if we should run stddev reduction function. */
- CLReductionOperation _reduction_operation_mean; /**< Reduction operation function for computing mean value. */
- CLReductionOperation _reduction_operation_stddev; /**< Reduction operation function for computing standard deviation. */
- CLTensor _reduction_output_mean; /**< Reduction operation output tensor for mean value. */
- CLTensor _reduction_output_stddev; /**< Reduction operation output tensor for standard deviation value. */
- float *_mean; /**< Pointer that holds the mean value. */
- float *_stddev; /**< Pointer that holds the standard deviation value. */
- CLMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */
- CLFillBorderKernel _fill_border_kernel; /**< Kernel that fills the border with zeroes. */
- cl::Buffer _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */
- cl::Buffer _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */
+ MemoryGroup _memory_group; /**< Function's memory group */
+ DataType _data_type; /**< Input data type. */
+ unsigned int _num_pixels; /**< Number of image's pixels. */
+ bool _run_stddev; /**< Flag for knowing if we should run stddev reduction function. */
+ CLReductionOperation _reduction_operation_mean; /**< Reduction operation function for computing mean value. */
+ CLReductionOperation _reduction_operation_stddev; /**< Reduction operation function for computing standard deviation. */
+ CLTensor _reduction_output_mean; /**< Reduction operation output tensor for mean value. */
+ CLTensor _reduction_output_stddev; /**< Reduction operation output tensor for standard deviation value. */
+ float *_mean; /**< Pointer that holds the mean value. */
+ float *_stddev; /**< Pointer that holds the standard deviation value. */
+ std::unique_ptr<CLMeanStdDevKernel> _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */
+ std::unique_ptr<CLFillBorderKernel> _fill_border_kernel; /**< Kernel that fills the border with zeroes. */
+ cl::Buffer _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */
+ cl::Buffer _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */
};
}
#endif /*ARM_COMPUTE_CLMEANSTDDEV_H */
diff --git a/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h
index 1627de1ae8..cfe59eac09 100644
--- a/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h
@@ -29,7 +29,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to execute mean and standard deviation normalization by calling @ref CLMeanStdDevNormalizationKernel */
class CLMeanStdDevNormalizationLayer : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLMedian3x3.h b/arm_compute/runtime/CL/functions/CLMedian3x3.h
index 7f67f958c1..6c0458203e 100644
--- a/arm_compute/runtime/CL/functions/CLMedian3x3.h
+++ b/arm_compute/runtime/CL/functions/CLMedian3x3.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute median filter. This function calls the following OpenCL kernels:
diff --git a/arm_compute/runtime/CL/functions/CLMinMaxLocation.h b/arm_compute/runtime/CL/functions/CLMinMaxLocation.h
index 04926f7bd0..4e3f28b006 100644
--- a/arm_compute/runtime/CL/functions/CLMinMaxLocation.h
+++ b/arm_compute/runtime/CL/functions/CLMinMaxLocation.h
@@ -24,12 +24,16 @@
#ifndef ARM_COMPUTE_CLMINMAXLOCATION_H
#define ARM_COMPUTE_CLMINMAXLOCATION_H
-#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h"
#include "arm_compute/runtime/CL/CLArray.h"
#include "arm_compute/runtime/IFunction.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class CLMinMaxKernel;
+class CLMinMaxLocationKernel;
class ICLTensor;
using ICLImage = ICLTensor;
@@ -51,6 +55,8 @@ public:
CLMinMaxLocation(CLMinMaxLocation &&) = default;
/** Allow instances of this class to be moved */
CLMinMaxLocation &operator=(CLMinMaxLocation &&) = default;
+ /** Default destructor */
+ ~CLMinMaxLocation();
/** Initialise the kernel's inputs and outputs.
*
* @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size.
@@ -87,16 +93,16 @@ public:
void run() override;
private:
- CLMinMaxKernel _min_max_kernel; /**< Kernel that performs min/max */
- CLMinMaxLocationKernel _min_max_loc_kernel; /**< Kernel that counts min/max occurrences and identifies their positions */
- cl::Buffer _min_max_vals; /**< Buffer to collect min, max values */
- cl::Buffer _min_max_count_vals; /**< Buffer to collect min, max values */
- void *_min; /**< Minimum value. */
- void *_max; /**< Maximum value. */
- uint32_t *_min_count; /**< Minimum value occurrences. */
- uint32_t *_max_count; /**< Maximum value occurrences. */
- CLCoordinates2DArray *_min_loc; /**< Minimum value occurrences coordinates. */
- CLCoordinates2DArray *_max_loc; /**< Maximum value occurrences coordinates. */
+ std::unique_ptr<CLMinMaxKernel> _min_max_kernel; /**< Kernel that performs min/max */
+ std::unique_ptr<CLMinMaxLocationKernel> _min_max_loc_kernel; /**< Kernel that counts min/max occurrences and identifies their positions */
+ cl::Buffer _min_max_vals; /**< Buffer to collect min, max values */
+ cl::Buffer _min_max_count_vals; /**< Buffer to collect min, max values */
+ void *_min; /**< Minimum value. */
+ void *_max; /**< Maximum value. */
+ uint32_t *_min_count; /**< Minimum value occurrences. */
+ uint32_t *_max_count; /**< Maximum value occurrences. */
+ CLCoordinates2DArray *_min_loc; /**< Minimum value occurrences coordinates. */
+ CLCoordinates2DArray *_max_loc; /**< Maximum value occurrences coordinates. */
};
}
#endif /*ARM_COMPUTE_CLMINMAXLOCATION_H */
diff --git a/arm_compute/runtime/CL/functions/CLNonLinearFilter.h b/arm_compute/runtime/CL/functions/CLNonLinearFilter.h
index 8b7e350e09..1b466bf662 100644
--- a/arm_compute/runtime/CL/functions/CLNonLinearFilter.h
+++ b/arm_compute/runtime/CL/functions/CLNonLinearFilter.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute non linear filter. This function calls the following OpenCL kernels:
diff --git a/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h b/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h
index 556de1c64c..c767a042ff 100644
--- a/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h
+++ b/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h
@@ -29,6 +29,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute non-maxima suppression over a 3x3 window. This function calls the following CL kernels:
diff --git a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
index a2d46b368f..389b21e5c8 100644
--- a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
@@ -24,18 +24,19 @@
#ifndef ARM_COMPUTE_CLNORMALIZATIONLAYER_H
#define ARM_COMPUTE_CLNORMALIZATIONLAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/Types.h"
+#include <memory>
namespace arm_compute
{
+class CLCompileContext;
+class CLFillBorderKernel;
+class CLNormalizationLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to compute a normalization layer. This function calls the following CL kernels:
*
@@ -48,6 +49,16 @@ class CLNormalizationLayer : public IFunction
public:
/** Default constructor */
CLNormalizationLayer();
+ /** Prevent instances of this class from being copied */
+ CLNormalizationLayer(const CLNormalizationLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLNormalizationLayer &operator=(const CLNormalizationLayer &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLNormalizationLayer(CLNormalizationLayer &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLNormalizationLayer &operator=(CLNormalizationLayer &&) = delete;
+ /** Default destructor */
+ ~CLNormalizationLayer();
/** Set the input and output tensors.
*
* @param[in, out] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
@@ -85,8 +96,8 @@ public:
void run() override;
private:
- CLNormalizationLayerKernel _norm_kernel; /**< Normalization layer kernel to run */
- CLFillBorderKernel _border_handler; /**< Kernel to handle borders */
+ std::unique_ptr<CLNormalizationLayerKernel> _norm_kernel; /**< Normalization layer kernel to run */
+ std::unique_ptr<CLFillBorderKernel> _border_handler; /**< Kernel to handle borders */
};
}
#endif /* ARM_COMPUTE_CLNORMALIZATIONLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h b/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h
index cf4a9b6497..de5155c65a 100644
--- a/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h
+++ b/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h
@@ -31,7 +31,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLNormalizePlanarYUVLayerKernel
*
diff --git a/arm_compute/runtime/CL/functions/CLOpticalFlow.h b/arm_compute/runtime/CL/functions/CLOpticalFlow.h
index adce6748c8..0e34374aa5 100644
--- a/arm_compute/runtime/CL/functions/CLOpticalFlow.h
+++ b/arm_compute/runtime/CL/functions/CLOpticalFlow.h
@@ -24,8 +24,6 @@
#ifndef ARM_COMPUTE_CLOPTICALFLOW_H
#define ARM_COMPUTE_CLOPTICALFLOW_H
-#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h"
-
#include "arm_compute/core/IArray.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLArray.h"
@@ -41,7 +39,12 @@
namespace arm_compute
{
+class CLCompileContext;
class CLPyramid;
+class CLLKTrackerInitKernel;
+class CLLKTrackerStage0Kernel;
+class CLLKTrackerStage1Kernel;
+class CLLKTrackerFinalizeKernel;
/** OpenCL Array of Internal Keypoints */
using CLLKInternalKeypointArray = CLArray<CLLKInternalKeypoint>;
@@ -71,6 +74,8 @@ public:
CLOpticalFlow(CLOpticalFlow &&) = default;
/** Allow instances of this class to be moved */
CLOpticalFlow &operator=(CLOpticalFlow &&) = default;
+ /** Default destructor */
+ ~CLOpticalFlow();
/** Initialise the function input and output
*
* @param[in] old_pyramid Pointer to the pyramid for the old tensor. Data types supported U8
@@ -117,22 +122,22 @@ public:
void run() override;
private:
- MemoryGroup _memory_group;
- std::vector<CLLKTrackerInitKernel> _tracker_init_kernel;
- std::vector<CLLKTrackerStage0Kernel> _tracker_stage0_kernel;
- std::vector<CLLKTrackerStage1Kernel> _tracker_stage1_kernel;
- CLLKTrackerFinalizeKernel _tracker_finalize_kernel;
- std::vector<CLScharr3x3> _func_scharr;
- std::vector<CLTensor> _scharr_gx;
- std::vector<CLTensor> _scharr_gy;
- const ICLKeyPointArray *_old_points;
- const ICLKeyPointArray *_new_points_estimates;
- ICLKeyPointArray *_new_points;
- std::unique_ptr<CLLKInternalKeypointArray> _old_points_internal;
- std::unique_ptr<CLLKInternalKeypointArray> _new_points_internal;
- std::unique_ptr<CLCoefficientTableArray> _coefficient_table;
- std::unique_ptr<CLOldValueArray> _old_values;
- size_t _num_levels;
+ MemoryGroup _memory_group;
+ std::vector<std::unique_ptr<CLLKTrackerInitKernel>> _tracker_init_kernel;
+ std::vector<std::unique_ptr<CLLKTrackerStage0Kernel>> _tracker_stage0_kernel;
+ std::vector<std::unique_ptr<CLLKTrackerStage1Kernel>> _tracker_stage1_kernel;
+ std::unique_ptr<CLLKTrackerFinalizeKernel> _tracker_finalize_kernel;
+ std::vector<CLScharr3x3> _func_scharr;
+ std::vector<CLTensor> _scharr_gx;
+ std::vector<CLTensor> _scharr_gy;
+ const ICLKeyPointArray *_old_points;
+ const ICLKeyPointArray *_new_points_estimates;
+ ICLKeyPointArray *_new_points;
+ std::unique_ptr<CLLKInternalKeypointArray> _old_points_internal;
+ std::unique_ptr<CLLKInternalKeypointArray> _new_points_internal;
+ std::unique_ptr<CLCoefficientTableArray> _coefficient_table;
+ std::unique_ptr<CLOldValueArray> _old_values;
+ size_t _num_levels;
};
}
#endif /*ARM_COMPUTE_CLOPTICALFLOW_H */
diff --git a/arm_compute/runtime/CL/functions/CLPReluLayer.h b/arm_compute/runtime/CL/functions/CLPReluLayer.h
index ffde9ec186..ab32bccc24 100644
--- a/arm_compute/runtime/CL/functions/CLPReluLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPReluLayer.h
@@ -24,13 +24,14 @@
#ifndef ARM_COMPUTE_CLPRELULAYER_H
#define ARM_COMPUTE_CLPRELULAYER_H
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/runtime/CL/ICLOperator.h"
#include "arm_compute/runtime/IFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
namespace experimental
{
diff --git a/arm_compute/runtime/CL/functions/CLPadLayer.h b/arm_compute/runtime/CL/functions/CLPadLayer.h
index e3a923f81c..2bbde30fc2 100644
--- a/arm_compute/runtime/CL/functions/CLPadLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPadLayer.h
@@ -24,13 +24,15 @@
#ifndef ARM_COMPUTE_CLPADLAYER_H
#define ARM_COMPUTE_CLPADLAYER_H
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
-#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h"
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLPadLayerKernel;
+class CLCopyKernel;
class ICLTensor;
/** Basic function to pad a tensor. This function calls the following OpenCL functions/kernels:
@@ -51,6 +53,8 @@ public:
CLPadLayer &operator=(const CLPadLayer &) = delete;
/** Default move assignment operator */
CLPadLayer &operator=(CLPadLayer &&) = default;
+ /** Default destructor */
+ ~CLPadLayer();
/** Initialize the function
*
@@ -95,9 +99,9 @@ public:
private:
void configure_reflect_mode(ICLTensor *input, ICLTensor *output);
- CLPadLayerKernel _pad_kernel;
- CLCopyKernel _copy_kernel;
- bool _perform_pad;
+ std::unique_ptr<CLPadLayerKernel> _pad_kernel;
+ std::unique_ptr<CLCopyKernel> _copy_kernel;
+ bool _perform_pad;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_PADLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLPermute.h b/arm_compute/runtime/CL/functions/CLPermute.h
index abc23eff0c..50e81da7c4 100644
--- a/arm_compute/runtime/CL/functions/CLPermute.h
+++ b/arm_compute/runtime/CL/functions/CLPermute.h
@@ -31,7 +31,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to execute an @ref CLPermuteKernel. */
class CLPermute : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLPhase.h b/arm_compute/runtime/CL/functions/CLPhase.h
index 2731a08a52..34b8e72175 100644
--- a/arm_compute/runtime/CL/functions/CLPhase.h
+++ b/arm_compute/runtime/CL/functions/CLPhase.h
@@ -29,6 +29,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute an @ref CLMagnitudePhaseKernel. */
diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
index 2066012306..6432cd040d 100644
--- a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
+++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
@@ -24,14 +24,16 @@
#ifndef ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H
#define ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/runtime/CL/ICLOperator.h"
#include "arm_compute/runtime/IFunction.h"
namespace arm_compute
{
// Forward declaration
+class CLCompileContext;
+class CLFillBorderKernel;
class ICLTensor;
+class ITensorInfo;
namespace experimental
{
@@ -106,7 +108,7 @@ public:
void run(ITensorPack &tensors) override;
private:
- CLFillBorderKernel _border_handler;
+ std::unique_ptr<CLFillBorderKernel> _border_handler;
};
/** Basic function to run @ref CLComplexPixelWiseMultiplicationKernel. */
@@ -139,7 +141,7 @@ public:
void run(ITensorPack &tensors) override;
private:
- CLFillBorderKernel _border_handler;
+ std::unique_ptr<CLFillBorderKernel> _border_handler;
};
} // namespace experimental
diff --git a/arm_compute/runtime/CL/functions/CLPoolingLayer.h b/arm_compute/runtime/CL/functions/CLPoolingLayer.h
index 96dacf9322..ef1f426c22 100644
--- a/arm_compute/runtime/CL/functions/CLPoolingLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPoolingLayer.h
@@ -31,7 +31,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following OpenCL kernels:
*
diff --git a/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h b/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h
index 9a78e77307..9129bfd064 100644
--- a/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h
@@ -24,13 +24,16 @@
#ifndef ARM_COMPUTE_CLPRIORBOXLAYER_H
#define ARM_COMPUTE_CLPRIORBOXLAYER_H
-#include "arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h"
+#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLPriorBoxLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLPriorBoxLayerKernel. */
class CLPriorBoxLayer : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h
index 6e537680ee..a8f9221b3d 100644
--- a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h
+++ b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h
@@ -24,9 +24,6 @@
#ifndef ARM_COMPUTE_CLQLSTMLAYER_H
#define ARM_COMPUTE_CLQLSTMLAYER_H
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
-#include "arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h"
@@ -40,7 +37,12 @@
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
+class CLCopyKernel;
class ICLTensor;
+class CLGEMMLowpMatrixAReductionKernel;
+class CLQLSTMLayerNormalizationKernel;
+class ITensorInfo;
/** Basic function to run @ref CLQLSTMLayer
*
@@ -68,6 +70,8 @@ public:
CLQLSTMLayer &operator=(const CLQLSTMLayer &) = delete;
/** Default move assignment operator */
CLQLSTMLayer &operator=(CLQLSTMLayer &&) = default;
+ /** Default destructor */
+ ~CLQLSTMLayer();
/** Initialize function's tensors.
*
* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.
@@ -285,72 +289,72 @@ private:
};
// Functions used
- CLTranspose _transpose_input_to_forget_weights{};
- CLTranspose _transpose_input_to_cell_weights{};
- CLTranspose _transpose_input_to_output_weights{};
- CLTranspose _transpose_input_to_input_weights{};
- CLTranspose _transpose_recurrent_to_forget_weights{};
- CLTranspose _transpose_recurrent_to_cell_weights{};
- CLTranspose _transpose_recurrent_to_output_weights{};
- CLTranspose _transpose_recurrent_to_input_weights{};
- CLTranspose _transpose_projection_weights{};
- CLGEMMLowpMatrixAReductionKernel _input_to_input_reduction{};
- CLGEMMLowpMatrixAReductionKernel _recurrent_to_input_reduction{};
- CLGEMMLowpMatrixAReductionKernel _input_to_forget_reduction{};
- CLGEMMLowpMatrixAReductionKernel _recurrent_to_forget_reduction{};
- CLGEMMLowpMatrixAReductionKernel _input_to_cell_reduction{};
- CLGEMMLowpMatrixAReductionKernel _recurrent_to_cell_reduction{};
- CLGEMMLowpMatrixAReductionKernel _input_to_output_reduction{};
- CLGEMMLowpMatrixAReductionKernel _recurrent_to_output_reduction{};
- CLGEMMLowpMatrixAReductionKernel _projection_reduction{};
- CLArithmeticAddition _projection_bias_add{};
- CLGEMMLowpMatrixMultiplyCore _mm_input_to_forget{};
- CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget{};
- CLPixelWiseMultiplication _pixelwise_mul_cell_to_forget{};
- CLGEMMLowpOutputStage _input_to_forget_outstage{};
- CLGEMMLowpOutputStage _recurrent_to_forget_outstage{};
- CLGEMMLowpOutputStage _cell_to_forget_outstage{};
- CLArithmeticAddition _accumulate_input_recurrent_forget{};
- CLArithmeticAddition _accumulate_cell_forget{};
- CLActivationLayer _forget_gate_sigmoid{};
- CLGEMMLowpMatrixMultiplyCore _mm_input_to_cell{};
- CLGEMMLowpOutputStage _input_to_cell_outstage{};
- CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell{};
- CLGEMMLowpOutputStage _recurrent_to_cell_outstage{};
- CLArithmeticAddition _accumulate_input_recurrent_modulation{};
- CLActivationLayer _cell_gate_tanh{};
- CLArithmeticSubtraction _input_gate_sub{};
- CLGEMMLowpMatrixMultiplyCore _mm_input_to_input{};
- CLGEMMLowpOutputStage _input_to_input_outstage{};
- CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input{};
- CLGEMMLowpOutputStage _recurrent_to_input_outstage{};
- CLArithmeticAddition _accumulate_input_recurrent_input{};
- CLPixelWiseMultiplication _pixelwise_mul_cell_to_input{};
- CLGEMMLowpOutputStage _cell_to_input_outstage{};
- CLArithmeticAddition _accumulate_cell_input{};
- CLActivationLayer _input_gate_sigmoid{};
- CLPixelWiseMultiplication _pixelwise_mul_forget_cell{};
- CLPixelWiseMultiplication _pixelwise_mul_input_cell{};
- CLArithmeticAddition _add_forget_cell{};
- CLActivationLayer _cell_clip{};
- CLGEMMLowpMatrixMultiplyCore _mm_input_to_output{};
- CLGEMMLowpOutputStage _input_to_output_outstage{};
- CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output{};
- CLGEMMLowpOutputStage _recurrent_to_output_outstage{};
- CLArithmeticAddition _accumulate_input_recurrent_output{};
- CLPixelWiseMultiplication _pixelwise_mul_cell_to_output{};
- CLGEMMLowpOutputStage _cell_to_output_outstage{};
- CLArithmeticAddition _accumulate_cell_to_output{};
- CLActivationLayer _output_gate_sigmoid{};
- CLActivationLayer _hidden_tanh{};
- CLPixelWiseMultiplication _pixelwise_mul_hidden{};
- CLGEMMLowpOutputStage _hidden_outstage{};
- CLGEMMLowpMatrixMultiplyCore _mm_projection{};
- CLGEMMLowpOutputStage _projection_outstage{};
- CLArithmeticAddition _accumulate_projection{};
- CLActivationLayer _projection_clip{};
- std::array<CLQLSTMLayerNormalizationKernel, _layer_norm_count> _layer_norms{ {} };
- CLCopyKernel _copy_output{};
+ CLTranspose _transpose_input_to_forget_weights{};
+ CLTranspose _transpose_input_to_cell_weights{};
+ CLTranspose _transpose_input_to_output_weights{};
+ CLTranspose _transpose_input_to_input_weights{};
+ CLTranspose _transpose_recurrent_to_forget_weights{};
+ CLTranspose _transpose_recurrent_to_cell_weights{};
+ CLTranspose _transpose_recurrent_to_output_weights{};
+ CLTranspose _transpose_recurrent_to_input_weights{};
+ CLTranspose _transpose_projection_weights{};
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_input_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_input_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_forget_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_forget_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_cell_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_cell_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_output_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_output_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _projection_reduction;
+ CLArithmeticAddition _projection_bias_add{};
+ CLGEMMLowpMatrixMultiplyCore _mm_input_to_forget{};
+ CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget{};
+ CLPixelWiseMultiplication _pixelwise_mul_cell_to_forget{};
+ CLGEMMLowpOutputStage _input_to_forget_outstage{};
+ CLGEMMLowpOutputStage _recurrent_to_forget_outstage{};
+ CLGEMMLowpOutputStage _cell_to_forget_outstage{};
+ CLArithmeticAddition _accumulate_input_recurrent_forget{};
+ CLArithmeticAddition _accumulate_cell_forget{};
+ CLActivationLayer _forget_gate_sigmoid{};
+ CLGEMMLowpMatrixMultiplyCore _mm_input_to_cell{};
+ CLGEMMLowpOutputStage _input_to_cell_outstage{};
+ CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell{};
+ CLGEMMLowpOutputStage _recurrent_to_cell_outstage{};
+ CLArithmeticAddition _accumulate_input_recurrent_modulation{};
+ CLActivationLayer _cell_gate_tanh{};
+ CLArithmeticSubtraction _input_gate_sub{};
+ CLGEMMLowpMatrixMultiplyCore _mm_input_to_input{};
+ CLGEMMLowpOutputStage _input_to_input_outstage{};
+ CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input{};
+ CLGEMMLowpOutputStage _recurrent_to_input_outstage{};
+ CLArithmeticAddition _accumulate_input_recurrent_input{};
+ CLPixelWiseMultiplication _pixelwise_mul_cell_to_input{};
+ CLGEMMLowpOutputStage _cell_to_input_outstage{};
+ CLArithmeticAddition _accumulate_cell_input{};
+ CLActivationLayer _input_gate_sigmoid{};
+ CLPixelWiseMultiplication _pixelwise_mul_forget_cell{};
+ CLPixelWiseMultiplication _pixelwise_mul_input_cell{};
+ CLArithmeticAddition _add_forget_cell{};
+ CLActivationLayer _cell_clip{};
+ CLGEMMLowpMatrixMultiplyCore _mm_input_to_output{};
+ CLGEMMLowpOutputStage _input_to_output_outstage{};
+ CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output{};
+ CLGEMMLowpOutputStage _recurrent_to_output_outstage{};
+ CLArithmeticAddition _accumulate_input_recurrent_output{};
+ CLPixelWiseMultiplication _pixelwise_mul_cell_to_output{};
+ CLGEMMLowpOutputStage _cell_to_output_outstage{};
+ CLArithmeticAddition _accumulate_cell_to_output{};
+ CLActivationLayer _output_gate_sigmoid{};
+ CLActivationLayer _hidden_tanh{};
+ CLPixelWiseMultiplication _pixelwise_mul_hidden{};
+ CLGEMMLowpOutputStage _hidden_outstage{};
+ CLGEMMLowpMatrixMultiplyCore _mm_projection{};
+ CLGEMMLowpOutputStage _projection_outstage{};
+ CLArithmeticAddition _accumulate_projection{};
+ CLActivationLayer _projection_clip{};
+ std::array<std::unique_ptr<CLQLSTMLayerNormalizationKernel>, _layer_norm_count> _layer_norms;
+ std::unique_ptr<CLCopyKernel> _copy_output;
TensorCopyKernel _projection_bias_copy{};
TensorCopyKernel _projection_output_to_accumulate_copy{};
@@ -402,30 +406,11 @@ private:
inline CLQLSTMLayerNormalizationKernel &get_layer_norm(LayerNormGate g)
{
- return _layer_norms[getGateIndex(g)];
+ return *_layer_norms[getGateIndex(g)];
}
- inline void configure_layer_norm(LayerNormGate g, const ICLTensor *in)
- {
- ARM_COMPUTE_ERROR_ON(!_has_layer_norm);
-
- CLTensor *out = &get_layer_norm_output(g);
- _memory_group.manage(out);
- out->allocator()->init(*(in->info()));
-
- get_layer_norm(g).configure(in, out, get_layer_norm_weight(g), get_layer_norm_bias(g));
- }
-
- inline static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias)
- {
- // Output quantization scale will be different, but ignored here
- // since it will be configured at configure() stage.
- const TensorInfo out
- {
- in
- };
- return CLQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias);
- }
+ inline void configure_layer_norm(LayerNormGate g, const ICLTensor *in);
+ inline static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias);
// Temporary tensors
CLTensor _input_to_forget_weights_transposed{ nullptr };
diff --git a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
index e045adf5fd..a0a27c5cb4 100644
--- a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
@@ -24,11 +24,14 @@
#ifndef ARM_COMPUTE_CLQUANTIZATIONLAYER_H
#define ARM_COMPUTE_CLQUANTIZATIONLAYER_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to simulate a quantization layer. This function calls the following CL kernels:
*
diff --git a/arm_compute/runtime/CL/functions/CLRNNLayer.h b/arm_compute/runtime/CL/functions/CLRNNLayer.h
index 9d1cb1a724..ff3fb5449b 100644
--- a/arm_compute/runtime/CL/functions/CLRNNLayer.h
+++ b/arm_compute/runtime/CL/functions/CLRNNLayer.h
@@ -24,15 +24,17 @@
#ifndef ARM_COMPUTE_CLRNN_LAYER_H
#define ARM_COMPUTE_CLRNN_LAYER_H
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h"
#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
#include "arm_compute/runtime/CL/functions/CLGEMM.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCopyKernel;
class ICLTensor;
/** Basic function to run @ref CLRNNLayer */
@@ -41,6 +43,12 @@ class CLRNNLayer : public IFunction
public:
/** Default constructor */
CLRNNLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLRNNLayer(const CLRNNLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLRNNLayer &operator=(const CLRNNLayer &) = delete;
+ /** Default destructor */
+ ~CLRNNLayer();
/** Initialize the function
*
* @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32
@@ -85,16 +93,16 @@ public:
void prepare() override;
private:
- MemoryGroup _memory_group;
- CLGEMM _gemm_state_f;
- CLArithmeticAddition _add_kernel;
- CLActivationLayer _activation;
- CLFullyConnectedLayer _fully_connected_kernel;
- CLCopyKernel _copy_kernel;
- CLTensor _fully_connected_out;
- CLTensor _gemm_output;
- CLTensor _add_output;
- bool _is_prepared;
+ MemoryGroup _memory_group;
+ CLGEMM _gemm_state_f;
+ CLArithmeticAddition _add_kernel;
+ CLActivationLayer _activation;
+ CLFullyConnectedLayer _fully_connected_kernel;
+ std::unique_ptr<CLCopyKernel> _copy_kernel;
+ CLTensor _fully_connected_out;
+ CLTensor _gemm_output;
+ CLTensor _add_output;
+ bool _is_prepared;
};
}
#endif /* ARM_COMPUTE_CLRNN_LAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLROIAlignLayer.h b/arm_compute/runtime/CL/functions/CLROIAlignLayer.h
index 2e78f16d6b..b4cd5560ef 100644
--- a/arm_compute/runtime/CL/functions/CLROIAlignLayer.h
+++ b/arm_compute/runtime/CL/functions/CLROIAlignLayer.h
@@ -25,12 +25,14 @@
#define ARM_COMPUTE_CLROIALIGNLAYER_H
#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ROIPoolingLayerInfo;
+class ITensorInfo;
/** Basic function to run @ref CLROIAlignLayerKernel.
*
diff --git a/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h b/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h
index 30139274be..836575ef68 100644
--- a/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h
+++ b/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h
@@ -24,14 +24,14 @@
#ifndef ARM_COMPUTE_CLROIPOOLINGLAYER_H
#define ARM_COMPUTE_CLROIPOOLINGLAYER_H
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ROIPoolingLayerInfo;
/** Basic function to run @ref CLROIPoolingLayerKernel.
*
diff --git a/arm_compute/runtime/CL/functions/CLRange.h b/arm_compute/runtime/CL/functions/CLRange.h
index a86cfb605d..e11e740861 100644
--- a/arm_compute/runtime/CL/functions/CLRange.h
+++ b/arm_compute/runtime/CL/functions/CLRange.h
@@ -29,7 +29,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLRangeKernel
*
diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h
index 5d050d71d6..3fbcee6c21 100644
--- a/arm_compute/runtime/CL/functions/CLReductionOperation.h
+++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h
@@ -24,8 +24,6 @@
#ifndef ARM_COMPUTE_CLREDUCTIONOPERATION_H
#define ARM_COMPUTE_CLREDUCTIONOPERATION_H
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
#include "arm_compute/runtime/IFunction.h"
@@ -37,6 +35,9 @@
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
+class CLFillBorderKernel;
+class CLReductionOperationKernel;
class ICLTensor;
/** Perform reduction operation.
@@ -49,6 +50,16 @@ public:
* @param[in] memory_manager (Optional) Memory manager.
*/
CLReductionOperation(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Default Destructor */
+ ~CLReductionOperation();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLReductionOperation(const CLReductionOperation &) = delete;
+ /** Default move constructor */
+ CLReductionOperation(CLReductionOperation &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLReductionOperation &operator=(const CLReductionOperation &) = delete;
+ /** Default move assignment operator */
+ CLReductionOperation &operator=(CLReductionOperation &&) = default;
/** Set the input and output tensors.
*
@@ -88,15 +99,15 @@ public:
private:
ICLTensor *configure_intermediate_result_vector(ICLTensor *input, ICLTensor *output);
- MemoryGroup _memory_group;
- std::vector<CLTensor> _results_vector;
- std::vector<CLReductionOperationKernel> _reduction_kernels_vector;
- std::vector<CLFillBorderKernel> _border_handlers_vector;
- CLReshapeLayer _reshape;
- unsigned int _num_of_stages;
- unsigned int _reduction_axis;
- bool _is_serial;
- bool _is_reshape_required;
+ MemoryGroup _memory_group;
+ std::vector<CLTensor> _results_vector;
+ std::vector<std::unique_ptr<CLReductionOperationKernel>> _reduction_kernels_vector;
+ std::vector<std::unique_ptr<CLFillBorderKernel>> _border_handlers_vector;
+ CLReshapeLayer _reshape;
+ unsigned int _num_of_stages;
+ unsigned int _reduction_axis;
+ bool _is_serial;
+ bool _is_reshape_required;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLREDUCTIONOPERATION_H */ \ No newline at end of file
diff --git a/arm_compute/runtime/CL/functions/CLRemap.h b/arm_compute/runtime/CL/functions/CLRemap.h
index 5b110d58f4..bf5d348b3b 100644
--- a/arm_compute/runtime/CL/functions/CLRemap.h
+++ b/arm_compute/runtime/CL/functions/CLRemap.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute remap. This function calls the following OpenCL kernels:
diff --git a/arm_compute/runtime/CL/functions/CLReorgLayer.h b/arm_compute/runtime/CL/functions/CLReorgLayer.h
index a7287ce266..0840fd13fd 100644
--- a/arm_compute/runtime/CL/functions/CLReorgLayer.h
+++ b/arm_compute/runtime/CL/functions/CLReorgLayer.h
@@ -29,7 +29,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
class CLReorgLayer : public ICLSimpleFunction
{
diff --git a/arm_compute/runtime/CL/functions/CLReshapeLayer.h b/arm_compute/runtime/CL/functions/CLReshapeLayer.h
index 7fc6c3b864..b4d52ec8cf 100644
--- a/arm_compute/runtime/CL/functions/CLReshapeLayer.h
+++ b/arm_compute/runtime/CL/functions/CLReshapeLayer.h
@@ -29,7 +29,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLReshapeLayerKernel */
class CLReshapeLayer : public IFunction
diff --git a/arm_compute/runtime/CL/functions/CLReverse.h b/arm_compute/runtime/CL/functions/CLReverse.h
index 6b140920e9..81fa04b1f5 100644
--- a/arm_compute/runtime/CL/functions/CLReverse.h
+++ b/arm_compute/runtime/CL/functions/CLReverse.h
@@ -24,11 +24,14 @@
#ifndef ARM_COMPUTE_CLREVERSE_H
#define ARM_COMPUTE_CLREVERSE_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLReverseKernel */
class CLReverse : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLScale.h b/arm_compute/runtime/CL/functions/CLScale.h
index d776e83035..360d63ea22 100644
--- a/arm_compute/runtime/CL/functions/CLScale.h
+++ b/arm_compute/runtime/CL/functions/CLScale.h
@@ -32,7 +32,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLScaleKernel */
class CLScale : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLScharr3x3.h b/arm_compute/runtime/CL/functions/CLScharr3x3.h
index 3892874f35..19c860f39b 100644
--- a/arm_compute/runtime/CL/functions/CLScharr3x3.h
+++ b/arm_compute/runtime/CL/functions/CLScharr3x3.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute scharr 3x3 filter. This function calls the following OpenCL kernels:
diff --git a/arm_compute/runtime/CL/functions/CLSelect.h b/arm_compute/runtime/CL/functions/CLSelect.h
index a1af922303..7fd52312fb 100644
--- a/arm_compute/runtime/CL/functions/CLSelect.h
+++ b/arm_compute/runtime/CL/functions/CLSelect.h
@@ -24,14 +24,15 @@
#ifndef ARM_COMPUTE_CLSELECT_H
#define ARM_COMPUTE_CLSELECT_H
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLSelect */
class CLSelect : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLSlice.h b/arm_compute/runtime/CL/functions/CLSlice.h
index 23c398cb41..f17e77236d 100644
--- a/arm_compute/runtime/CL/functions/CLSlice.h
+++ b/arm_compute/runtime/CL/functions/CLSlice.h
@@ -31,6 +31,8 @@ namespace arm_compute
{
// Forward Declarations
class ICLTensor;
+class CLCompileContext;
+class ITensorInfo;
namespace experimental
{
diff --git a/arm_compute/runtime/CL/functions/CLSobel3x3.h b/arm_compute/runtime/CL/functions/CLSobel3x3.h
index 25d4ed6895..492900da11 100644
--- a/arm_compute/runtime/CL/functions/CLSobel3x3.h
+++ b/arm_compute/runtime/CL/functions/CLSobel3x3.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute sobel 3x3 filter. This function calls the following OpenCL kernels:
@@ -42,6 +43,14 @@ class ICLTensor;
class CLSobel3x3 : public ICLSimpleFunction
{
public:
+ /** Default Constructor */
+ CLSobel3x3() = default;
+ /** Prevent instances of this class from being copied */
+ CLSobel3x3(const CLSobel3x3 &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLSobel3x3 &operator=(const CLSobel3x3 &) = delete;
+ /** Default destructor */
+ ~CLSobel3x3();
/** Initialise the function's source, destinations and border mode.
*
* @note At least one of output_x or output_y must be not NULL.
diff --git a/arm_compute/runtime/CL/functions/CLSobel5x5.h b/arm_compute/runtime/CL/functions/CLSobel5x5.h
index 1f91c46f7f..a00fdd72b8 100644
--- a/arm_compute/runtime/CL/functions/CLSobel5x5.h
+++ b/arm_compute/runtime/CL/functions/CLSobel5x5.h
@@ -24,8 +24,6 @@
#ifndef ARM_COMPUTE_CLSOBEL5X5_H
#define ARM_COMPUTE_CLSOBEL5X5_H
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
@@ -37,6 +35,10 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLFillBorderKernel;
+class CLSobel5x5HorKernel;
+class CLSobel5x5VertKernel;
class ICLTensor;
/** Basic function to execute sobel 5x5 filter. This function calls the following OpenCL kernels:
@@ -54,6 +56,12 @@ public:
* @param[in] memory_manager (Optional) Memory manager.
*/
CLSobel5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLSobel5x5(const CLSobel5x5 &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLSobel5x5 &operator=(const CLSobel5x5 &) = delete;
+ /** Default destructor */
+ ~CLSobel5x5();
/** Initialise the function's source, destinations and border mode.
*
* @note At least one of output_x or output_y must be not NULL.
@@ -82,12 +90,12 @@ public:
void run() override;
protected:
- MemoryGroup _memory_group; /**< Function's memory group */
- CLSobel5x5HorKernel _sobel_hor; /**< Sobel Horizontal 5x5 kernel */
- CLSobel5x5VertKernel _sobel_vert; /**< Sobel Vertical 5x5 kernel */
- CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */
- CLImage _tmp_x; /**< Temporary buffer for Sobel X */
- CLImage _tmp_y; /**< Temporary buffer for Sobel Y */
+ MemoryGroup _memory_group; /**< Function's memory group */
+ std::unique_ptr<CLSobel5x5HorKernel> _sobel_hor; /**< Sobel Horizontal 5x5 kernel */
+ std::unique_ptr<CLSobel5x5VertKernel> _sobel_vert; /**< Sobel Vertical 5x5 kernel */
+ std::unique_ptr<CLFillBorderKernel> _border_handler; /**< Kernel to handle image borders */
+ CLImage _tmp_x; /**< Temporary buffer for Sobel X */
+ CLImage _tmp_y; /**< Temporary buffer for Sobel Y */
};
}
#endif /*ARM_COMPUTE_CLSOBEL5X5_H */
diff --git a/arm_compute/runtime/CL/functions/CLSobel7x7.h b/arm_compute/runtime/CL/functions/CLSobel7x7.h
index 91daf64c29..01a863b11b 100644
--- a/arm_compute/runtime/CL/functions/CLSobel7x7.h
+++ b/arm_compute/runtime/CL/functions/CLSobel7x7.h
@@ -24,8 +24,6 @@
#ifndef ARM_COMPUTE_CLSOBEL7X7_H
#define ARM_COMPUTE_CLSOBEL7X7_H
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
@@ -37,6 +35,10 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLFillBorderKernel;
+class CLSobel7x7HorKernel;
+class CLSobel7x7VertKernel;
class ICLTensor;
/** Basic function to execute sobel 7x7 filter. This function calls the following OpenCL kernels:
@@ -54,6 +56,12 @@ public:
* @param[in] memory_manager (Optional) Memory manager.
*/
CLSobel7x7(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLSobel7x7(const CLSobel7x7 &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLSobel7x7 &operator=(const CLSobel7x7 &) = delete;
+ /** Default destructor */
+ ~CLSobel7x7();
/** Initialise the function's source, destinations and border mode.
*
* @note At least one of output_x or output_y must be not NULL.
@@ -82,12 +90,12 @@ public:
void run() override;
protected:
- MemoryGroup _memory_group; /**< Function's memory group */
- CLSobel7x7HorKernel _sobel_hor; /**< Sobel Horizontal 7x7 kernel */
- CLSobel7x7VertKernel _sobel_vert; /**< Sobel Vertical 7x7 kernel */
- CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */
- CLImage _tmp_x; /**< Temporary buffer for Sobel X */
- CLImage _tmp_y; /**< Temporary buffer for Sobel Y */
+ MemoryGroup _memory_group; /**< Function's memory group */
+ std::unique_ptr<CLSobel7x7HorKernel> _sobel_hor; /**< Sobel Horizontal 7x7 kernel */
+ std::unique_ptr<CLSobel7x7VertKernel> _sobel_vert; /**< Sobel Vertical 7x7 kernel */
+ std::unique_ptr<CLFillBorderKernel> _border_handler; /**< Kernel to handle image borders */
+ CLImage _tmp_x; /**< Temporary buffer for Sobel X */
+ CLImage _tmp_y; /**< Temporary buffer for Sobel Y */
};
}
#endif /*ARM_COMPUTE_CLSOBEL7X7_H */
diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
index fd71f3ed4d..ab10a64de4 100644
--- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
@@ -24,7 +24,6 @@
#ifndef ARM_COMPUTE_CLSOFTMAXLAYER_H
#define ARM_COMPUTE_CLSOFTMAXLAYER_H
-#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLPermute.h"
#include "arm_compute/runtime/IFunction.h"
@@ -35,7 +34,11 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLLogits1DMaxShiftExpSumKernel;
+class CLLogits1DNormKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to compute a SoftmaxLayer.
*
@@ -57,6 +60,16 @@ class CLSoftmaxLayerGeneric : public IFunction
public:
/** Constructor */
CLSoftmaxLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLSoftmaxLayerGeneric(const CLSoftmaxLayerGeneric &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLSoftmaxLayerGeneric &operator=(const CLSoftmaxLayerGeneric &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLSoftmaxLayerGeneric(CLSoftmaxLayerGeneric &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLSoftmaxLayerGeneric &operator=(CLSoftmaxLayerGeneric &&) = delete;
+ /** Default destructor */
+ ~CLSoftmaxLayerGeneric();
/** Set the input and output tensors.
*
* @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax
@@ -92,17 +105,17 @@ public:
void run() override;
private:
- MemoryGroup _memory_group;
- CLPermute _permute_input;
- CLPermute _permute_output;
- CLLogits1DMaxShiftExpSumKernel _max_shift_exp_sum_kernel;
- CLLogits1DNormKernel _norm_kernel;
- CLTensor _max;
- CLTensor _sum;
- CLTensor _tmp;
- CLTensor _input_permuted;
- CLTensor _output_permuted;
- bool _needs_permute;
+ MemoryGroup _memory_group;
+ CLPermute _permute_input;
+ CLPermute _permute_output;
+ std::unique_ptr<CLLogits1DMaxShiftExpSumKernel> _max_shift_exp_sum_kernel;
+ std::unique_ptr<CLLogits1DNormKernel> _norm_kernel;
+ CLTensor _max;
+ CLTensor _sum;
+ CLTensor _tmp;
+ CLTensor _input_permuted;
+ CLTensor _output_permuted;
+ bool _needs_permute;
};
using CLSoftmaxLayer = CLSoftmaxLayerGeneric<false>;
diff --git a/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h b/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h
index c6f7f11079..1611aa8ed4 100644
--- a/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h
@@ -24,16 +24,19 @@
#ifndef ARM_COMPUTE_CLSPACETOBATCHLAYER_H
#define ARM_COMPUTE_CLSPACETOBATCHLAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
-#include "arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
+class CLCompileContext;
+class CLMemsetKernel;
+class CLSpaceToBatchLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to spatial divide a tensor. This function calls the following OpenCL kernels/functions:
*
@@ -54,7 +57,7 @@ public:
/** Allow instances of this class to be moved */
CLSpaceToBatchLayer &operator=(CLSpaceToBatchLayer &&) = default;
/** Default destructor */
- virtual ~CLSpaceToBatchLayer() = default;
+ ~CLSpaceToBatchLayer();
/** Set the input and output tensors.
*
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
@@ -121,9 +124,9 @@ public:
void run() override;
private:
- CLSpaceToBatchLayerKernel _space_to_batch_kernel; /**< SpaceToBatch kernel to run */
- CLMemsetKernel _memset_kernel; /**< Memset kernel to run */
- bool _has_padding; /**< Flag to check if the output has padding */
+ std::unique_ptr<CLSpaceToBatchLayerKernel> _space_to_batch_kernel; /**< SpaceToBatch kernel to run */
+ std::unique_ptr<CLMemsetKernel> _memset_kernel; /**< Memset kernel to run */
+ bool _has_padding; /**< Flag to check if the output has padding */
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLSPACETOBATCHLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h b/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h
index 24830cf4d3..9e476fe7bd 100644
--- a/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h
@@ -24,14 +24,17 @@
#ifndef ARM_COMPUTE_CLSPACETODEPTHLAYER_H
#define ARM_COMPUTE_CLSPACETODEPTHLAYER_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h"
-#include "arm_compute/core/Types.h"
+#include <memory>
namespace arm_compute
{
+class CLCompileContext;
+class CLSpaceToDepthLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLSpaceToDepthLayerKernel. */
class CLSpaceToDepthLayer : public IFunction
@@ -39,6 +42,16 @@ class CLSpaceToDepthLayer : public IFunction
public:
/** Default constructor */
CLSpaceToDepthLayer();
+ /** Prevent instances of this class from being copied */
+ CLSpaceToDepthLayer(const CLSpaceToDepthLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLSpaceToDepthLayer &operator=(const CLSpaceToDepthLayer &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLSpaceToDepthLayer(CLSpaceToDepthLayer &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLSpaceToDepthLayer &operator=(CLSpaceToDepthLayer &&) = delete;
+ /** Default destructor */
+ ~CLSpaceToDepthLayer();
/** Set the input and output tensors.
*
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
@@ -68,7 +81,7 @@ public:
void run() override;
private:
- CLSpaceToDepthLayerKernel _space_to_depth_kernel; /**< CLSpaceToDepthLayerKernel to run */
+ std::unique_ptr<CLSpaceToDepthLayerKernel> _space_to_depth_kernel; /**< CLSpaceToDepthLayerKernel to run */
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLSPACETODEPTHLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLStackLayer.h b/arm_compute/runtime/CL/functions/CLStackLayer.h
index 95875962c8..3861fd299a 100644
--- a/arm_compute/runtime/CL/functions/CLStackLayer.h
+++ b/arm_compute/runtime/CL/functions/CLStackLayer.h
@@ -27,14 +27,15 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLStackLayerKernel.h"
-
#include <memory>
#include <vector>
namespace arm_compute
{
+class CLCompileContext;
+class CLStackLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to stack tensors along an axis. This function calls the following kernel:
*
@@ -46,6 +47,16 @@ class CLStackLayer : public IFunction
public:
/** Default constructor */
CLStackLayer();
+ /** Prevent instances of this class from being copied */
+ CLStackLayer(const CLStackLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLStackLayer &operator=(const CLStackLayer &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLStackLayer(CLStackLayer &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLStackLayer &operator=(CLStackLayer &&) = delete;
+ /** Default destructor */
+ ~CLStackLayer();
/** Initialise the kernel's inputs vector and output.
*
* @note Supported input tensor rank: up to 4
@@ -84,9 +95,9 @@ public:
void run() override;
private:
- std::vector<ICLTensor *> _input;
- std::vector<CLStackLayerKernel> _stack_kernels;
- unsigned int _num_inputs;
+ std::vector<ICLTensor *> _input;
+ std::vector<std::unique_ptr<CLStackLayerKernel>> _stack_kernels;
+ unsigned int _num_inputs;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLSTACKLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLTableLookup.h b/arm_compute/runtime/CL/functions/CLTableLookup.h
index 32d4b7bdf9..ca59309548 100644
--- a/arm_compute/runtime/CL/functions/CLTableLookup.h
+++ b/arm_compute/runtime/CL/functions/CLTableLookup.h
@@ -28,6 +28,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
class ICLLut;
diff --git a/arm_compute/runtime/CL/functions/CLThreshold.h b/arm_compute/runtime/CL/functions/CLThreshold.h
index f3af122f0a..2c9213bd01 100644
--- a/arm_compute/runtime/CL/functions/CLThreshold.h
+++ b/arm_compute/runtime/CL/functions/CLThreshold.h
@@ -33,6 +33,7 @@
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLThresholdKernel */
diff --git a/arm_compute/runtime/CL/functions/CLTile.h b/arm_compute/runtime/CL/functions/CLTile.h
index d2f1e9730c..69743693ff 100644
--- a/arm_compute/runtime/CL/functions/CLTile.h
+++ b/arm_compute/runtime/CL/functions/CLTile.h
@@ -24,13 +24,14 @@
#ifndef ARM_COMPUTE_CLTILE_H
#define ARM_COMPUTE_CLTILE_H
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLTileKernel */
class CLTile : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLTranspose.h b/arm_compute/runtime/CL/functions/CLTranspose.h
index 9ba7cafce4..2b7a03f23f 100644
--- a/arm_compute/runtime/CL/functions/CLTranspose.h
+++ b/arm_compute/runtime/CL/functions/CLTranspose.h
@@ -24,11 +24,14 @@
#ifndef ARM_COMPUTE_CLTRANSPOSE_H
#define ARM_COMPUTE_CLTRANSPOSE_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to transpose a matrix on OpenCL. This function calls the following OpenCL kernel:
*
diff --git a/arm_compute/runtime/CL/functions/CLUpsampleLayer.h b/arm_compute/runtime/CL/functions/CLUpsampleLayer.h
index 07b4c8aecb..88b293069d 100644
--- a/arm_compute/runtime/CL/functions/CLUpsampleLayer.h
+++ b/arm_compute/runtime/CL/functions/CLUpsampleLayer.h
@@ -26,13 +26,17 @@
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class CLUpsampleLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLUpsampleLayerKernel */
class CLUpsampleLayer : public IFunction
@@ -49,7 +53,7 @@ public:
/** Allow instances of this class to be moved */
CLUpsampleLayer &operator=(CLUpsampleLayer &&) = default;
/** Default destructor */
- virtual ~CLUpsampleLayer() = default;
+ ~CLUpsampleLayer();
/** Initialize the function's source, destination, interpolation type and border_mode.
*
@@ -86,8 +90,8 @@ public:
void run() override;
private:
- CLUpsampleLayerKernel _upsample;
- ICLTensor *_output;
+ std::unique_ptr<CLUpsampleLayerKernel> _upsample;
+ ICLTensor *_output;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLUPSAMPLELAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLWarpAffine.h b/arm_compute/runtime/CL/functions/CLWarpAffine.h
index eb7c05be84..153e9bfdfc 100644
--- a/arm_compute/runtime/CL/functions/CLWarpAffine.h
+++ b/arm_compute/runtime/CL/functions/CLWarpAffine.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLWarpAffineKernel for AFFINE transformation */
diff --git a/arm_compute/runtime/CL/functions/CLWarpPerspective.h b/arm_compute/runtime/CL/functions/CLWarpPerspective.h
index 2a1f78093e..5c8b5425a4 100644
--- a/arm_compute/runtime/CL/functions/CLWarpPerspective.h
+++ b/arm_compute/runtime/CL/functions/CLWarpPerspective.h
@@ -31,6 +31,7 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLWarpPerspectiveKernel for PERSPECTIVE transformation */
diff --git a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
index 602f644230..9ced69c1bb 100644
--- a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
@@ -24,8 +24,6 @@
#ifndef ARM_COMPUTE_CLWINOGRADCONVOLUTIONLAYER_H
#define ARM_COMPUTE_CLWINOGRADCONVOLUTIONLAYER_H
-#include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h"
-#include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/functions/CLGEMM.h"
#include "arm_compute/runtime/CL/functions/CLWinogradInputTransform.h"
@@ -33,7 +31,11 @@
namespace arm_compute
{
+class CLCompileContext;
+class CLWinogradFilterTransformKernel;
+class CLWinogradOutputTransformKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to execute Winograd-based convolution on OpenCL. This function calls the following OpenCL functions/kernels:
*
@@ -56,6 +58,8 @@ public:
CLWinogradConvolutionLayer &operator=(const CLWinogradConvolutionLayer &) = delete;
/** Default move assignment operator */
CLWinogradConvolutionLayer &operator=(CLWinogradConvolutionLayer &&) = default;
+ /** Default destructor */
+ ~CLWinogradConvolutionLayer();
/** Set the input and output tensors.
*
* @note: This function only works with 3x3,3x1,1x3,5x5,5x1,1x5,7x1 and 1x7 kernels along with unit strides for both NCHW and NHWC data layout
@@ -122,16 +126,16 @@ public:
void prepare() override;
private:
- MemoryGroup _memory_group;
- CLGEMM _batched_mm;
- CLWinogradInputTransform _input_transform;
- CLWinogradFilterTransformKernel _filter_transform;
- CLWinogradOutputTransformKernel _output_transform;
- CLTensor _input0;
- CLTensor _input1;
- CLTensor _batched_mm_output;
- const ICLTensor *_original_weights;
- bool _is_prepared;
+ MemoryGroup _memory_group;
+ CLGEMM _batched_mm;
+ CLWinogradInputTransform _input_transform;
+ std::unique_ptr<CLWinogradFilterTransformKernel> _filter_transform;
+ std::unique_ptr<CLWinogradOutputTransformKernel> _output_transform;
+ CLTensor _input0;
+ CLTensor _input1;
+ CLTensor _batched_mm_output;
+ const ICLTensor *_original_weights;
+ bool _is_prepared;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLWINOGRADCONVOLUTIONLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h
index 351f88012f..8cd809cc1f 100644
--- a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h
+++ b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h
@@ -31,7 +31,9 @@
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to execute a @ref CLWinogradInputTransformKernel. */
class CLWinogradInputTransform : public ICLSimpleFunction
diff --git a/arm_compute/runtime/CL/functions/CLYOLOLayer.h b/arm_compute/runtime/CL/functions/CLYOLOLayer.h
index 3e403f44bd..48ee4ea4f7 100644
--- a/arm_compute/runtime/CL/functions/CLYOLOLayer.h
+++ b/arm_compute/runtime/CL/functions/CLYOLOLayer.h
@@ -24,13 +24,14 @@
#ifndef ARM_COMPUTE_CLYOLOLAYER_H
#define ARM_COMPUTE_CLYOLOLAYER_H
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLYOLOLayerKernel that performs a partial activation on the input
*
diff --git a/arm_compute/runtime/IOperator.h b/arm_compute/runtime/IOperator.h
index 0097383115..fd285160e9 100644
--- a/arm_compute/runtime/IOperator.h
+++ b/arm_compute/runtime/IOperator.h
@@ -24,14 +24,13 @@
#ifndef ARM_COMPUTE_IOPERATOR_H
#define ARM_COMPUTE_IOPERATOR_H
-#include "arm_compute/core/ITensorPack.h"
#include "arm_compute/core/experimental/Types.h"
-#include "arm_compute/runtime/IOperator.h"
#include "arm_compute/runtime/IRuntimeContext.h"
#include "arm_compute/runtime/Types.h"
namespace arm_compute
{
+class ITensorPack;
namespace experimental
{
/** Base class for all async functions */