aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2017-09-15 19:06:47 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commit8a94e7cec7b09a417a278425e2b56e7af5bf45d9 (patch)
treee952f39903d4624bbd6445c9cc6c7dbcc1114026 /arm_compute
parent658039bc4e06be34272eccf559a516a6b52f75f5 (diff)
downloadComputeLibrary-8a94e7cec7b09a417a278425e2b56e7af5bf45d9.tar.gz
COMPMID-534: Add MemoryManager support in OpenCL functions
Adds support for: -CLConvolution -CLGEMM -CLGEMMLowp -CLHOGDescriptor -CLHOGGradient -CLHOGMultiDetection -CLL2Normalize -CLLocallyConnectedLayer -CLOpticalFlow -CLReductionOperation Change-Id: Ib13354d274ccf32ae933f3fbbad3ac3896cfd3bd Reviewed-on: http://mpd-gerrit.cambridge.arm.com/87938 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/runtime/CL/functions/CLConvolution.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMM.h7
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMLowp.h12
-rw-r--r--arm_compute/runtime/CL/functions/CLHOGDescriptor.h7
-rw-r--r--arm_compute/runtime/CL/functions/CLHOGGradient.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLHOGMultiDetection.h7
-rw-r--r--arm_compute/runtime/CL/functions/CLL2Normalize.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h7
-rw-r--r--arm_compute/runtime/CL/functions/CLOpticalFlow.h5
-rw-r--r--arm_compute/runtime/CL/functions/CLReductionOperation.h5
10 files changed, 55 insertions, 13 deletions
diff --git a/arm_compute/runtime/CL/functions/CLConvolution.h b/arm_compute/runtime/CL/functions/CLConvolution.h
index f526f6ff4a..bc05cb2a85 100644
--- a/arm_compute/runtime/CL/functions/CLConvolution.h
+++ b/arm_compute/runtime/CL/functions/CLConvolution.h
@@ -27,11 +27,14 @@
#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
#include <cstdint>
+#include <memory>
namespace arm_compute
{
@@ -70,7 +73,7 @@ class CLConvolutionSquare : public IFunction
{
public:
/** Default constructor */
- CLConvolutionSquare();
+ CLConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Initialize the function's source, destination, conv and border_mode.
*
* @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
@@ -86,6 +89,7 @@ public:
void run() override;
private:
+ CLMemoryGroup _memory_group; /**< Function's memory group */
CLTensor _tmp; /**< temporary buffer for output of horizontal pass */
bool _is_separable; /**< true if the convolution can be separated */
CLSeparableConvolutionHorKernel<matrix_size> _kernel_hor; /**< kernel for horizontal pass of separated convolution */
diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h
index 9b887305cb..2765b77b7d 100644
--- a/arm_compute/runtime/CL/functions/CLGEMM.h
+++ b/arm_compute/runtime/CL/functions/CLGEMM.h
@@ -29,8 +29,12 @@
#include "arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+
+#include <memory>
namespace arm_compute
{
@@ -48,7 +52,7 @@ class CLGEMM : public IFunction
{
public:
/** Default constructor. */
- CLGEMM();
+ CLGEMM(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Initialise the kernel's inputs and output
*
* @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
@@ -70,6 +74,7 @@ public:
void run() override;
private:
+ CLMemoryGroup _memory_group;
CLGEMMInterleave4x4Kernel _interleave_kernel;
CLGEMMTranspose1xWKernel _transpose_kernel;
CLGEMMMatrixMultiplyKernel _mm_kernel;
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowp.h b/arm_compute/runtime/CL/functions/CLGEMMLowp.h
index da8883c3f8..613fcaa7e0 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowp.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowp.h
@@ -25,12 +25,15 @@
#define __ARM_COMPUTE_CLGEMMLOWP_H__
#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+
+#include <memory>
namespace arm_compute
{
@@ -47,7 +50,7 @@ class CLGEMMLowp : public IFunction
{
public:
/** Constructor */
- CLGEMMLowp();
+ CLGEMMLowp(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Initialise the kernel's inputs, output
*
* @note GEMM_LOWP: low precision matrix multiply kernel
@@ -75,6 +78,7 @@ public:
void run() override;
private:
+ CLMemoryGroup _memory_group;
CLGEMMInterleave4x4Kernel _interleave_kernel;
CLGEMMTranspose1xWKernel _transpose_kernel;
CLGEMMLowpMatrixMultiplyKernel _mm_kernel;
diff --git a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h
index cdb23bff33..00d64f109f 100644
--- a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h
+++ b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h
@@ -26,9 +26,13 @@
#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLHOGGradient.h"
#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+
+#include <memory>
namespace arm_compute
{
@@ -44,7 +48,7 @@ class CLHOGDescriptor : public IFunction
{
public:
/** Default constructor */
- CLHOGDescriptor();
+ CLHOGDescriptor(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Initialise the function's source, destination, HOG data-object and border mode
*
* @param[in, out] input Input tensor. Data type supported: U8
@@ -60,6 +64,7 @@ public:
void run() override;
private:
+ CLMemoryGroup _memory_group;
CLHOGGradient _gradient;
CLHOGOrientationBinningKernel _orient_bin;
CLHOGBlockNormalizationKernel _block_norm;
diff --git a/arm_compute/runtime/CL/functions/CLHOGGradient.h b/arm_compute/runtime/CL/functions/CLHOGGradient.h
index e74a68497f..051e5860d7 100644
--- a/arm_compute/runtime/CL/functions/CLHOGGradient.h
+++ b/arm_compute/runtime/CL/functions/CLHOGGradient.h
@@ -28,11 +28,14 @@
#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLDerivative.h"
#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
#include <cstdint>
+#include <memory>
namespace arm_compute
{
@@ -46,7 +49,7 @@ class CLHOGGradient : public IFunction
{
public:
/** Default constructor */
- CLHOGGradient();
+ CLHOGGradient(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Initialise the function's source, destinations, phase type and border mode
*
* @param[in, out] input Input tensor. Data type supported: U8.
@@ -63,6 +66,7 @@ public:
void run() override;
private:
+ CLMemoryGroup _memory_group;
CLDerivative _derivative;
CLMagnitudePhaseKernel _mag_phase;
CLTensor _gx;
diff --git a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h
index 3fe0fa932a..1ff986511e 100644
--- a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h
+++ b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h
@@ -28,10 +28,14 @@
#include "arm_compute/core/CL/ICLMultiHOG.h"
#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h"
#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLHOGDetector.h"
#include "arm_compute/runtime/CL/functions/CLHOGGradient.h"
#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+
+#include <memory>
namespace arm_compute
{
@@ -53,7 +57,7 @@ class CLHOGMultiDetection : public IFunction
{
public:
/** Default constructor */
- CLHOGMultiDetection();
+ CLHOGMultiDetection(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
CLHOGMultiDetection(const CLHOGMultiDetection &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
@@ -85,6 +89,7 @@ public:
void run() override;
private:
+ CLMemoryGroup _memory_group;
CLHOGGradient _gradient_kernel;
std::unique_ptr<CLHOGOrientationBinningKernel[]> _orient_bin_kernel;
std::unique_ptr<CLHOGBlockNormalizationKernel[]> _block_norm_kernel;
diff --git a/arm_compute/runtime/CL/functions/CLL2Normalize.h b/arm_compute/runtime/CL/functions/CLL2Normalize.h
index 52c562c61b..20af54eda2 100644
--- a/arm_compute/runtime/CL/functions/CLL2Normalize.h
+++ b/arm_compute/runtime/CL/functions/CLL2Normalize.h
@@ -26,11 +26,14 @@
#include "arm_compute/core/CL/kernels/CLL2NormalizeKernel.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
#include "arm_compute/runtime/CL/functions/CLReductionOperation.h"
+#include "arm_compute/runtime/IMemoryManager.h"
#include <cstdint>
+#include <memory>
namespace arm_compute
{
@@ -42,7 +45,7 @@ class CLL2Normalize : public IFunction
{
public:
/** Constructor */
- CLL2Normalize();
+ CLL2Normalize(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Set the input and output tensors.
*
@@ -57,6 +60,7 @@ public:
void run() override;
private:
+ CLMemoryGroup _memory_group;
CLReductionOperation _reduce_func;
CLL2NormalizeKernel _normalize_kernel;
CLTensor _sumsq;
diff --git a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
index 5f4f1ba1d7..f56039f62a 100644
--- a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
@@ -31,7 +31,11 @@
#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+
+#include <memory>
namespace arm_compute
{
@@ -48,7 +52,7 @@ class CLLocallyConnectedLayer : public IFunction
{
public:
/** Default constructor */
- CLLocallyConnectedLayer();
+ CLLocallyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -66,6 +70,7 @@ public:
void run() override;
private:
+ CLMemoryGroup _memory_group;
CLIm2ColKernel _input_im2col_kernel;
CLWeightsReshapeKernel _weights_reshape_kernel;
CLLocallyConnectedMatrixMultiplyKernel _mm_kernel;
diff --git a/arm_compute/runtime/CL/functions/CLOpticalFlow.h b/arm_compute/runtime/CL/functions/CLOpticalFlow.h
index ca3f86100e..94dda186bf 100644
--- a/arm_compute/runtime/CL/functions/CLOpticalFlow.h
+++ b/arm_compute/runtime/CL/functions/CLOpticalFlow.h
@@ -29,9 +29,11 @@
#include "arm_compute/core/IArray.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLArray.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLScharr3x3.h"
#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
#include <cstddef>
#include <cstdint>
@@ -57,7 +59,7 @@ class CLOpticalFlow : public IFunction
{
public:
/** Default constructor */
- CLOpticalFlow();
+ CLOpticalFlow(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
CLOpticalFlow(const CLOpticalFlow &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
@@ -91,6 +93,7 @@ public:
void run() override;
private:
+ CLMemoryGroup _memory_group;
std::unique_ptr<CLLKTrackerInitKernel[]> _tracker_init_kernel;
std::unique_ptr<CLLKTrackerStage0Kernel[]> _tracker_stage0_kernel;
std::unique_ptr<CLLKTrackerStage1Kernel[]> _tracker_stage1_kernel;
diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h
index 89fdad2b24..09beabad8d 100644
--- a/arm_compute/runtime/CL/functions/CLReductionOperation.h
+++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h
@@ -27,8 +27,10 @@
#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
#include <cstdint>
#include <memory>
@@ -44,7 +46,7 @@ class CLReductionOperation : public IFunction
{
public:
/* Constructor */
- CLReductionOperation();
+ CLReductionOperation(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Set the input and output tensors.
*
@@ -59,6 +61,7 @@ public:
void run() override;
private:
+ CLMemoryGroup _memory_group;
std::vector<CLTensor *> _sums_vector{ nullptr };
std::unique_ptr<CLReductionOperationKernel[]> _reduction_kernels_vector{ nullptr };
std::unique_ptr<CLFillBorderKernel[]> _border_handlers_vector{ nullptr };