From 8a94e7cec7b09a417a278425e2b56e7af5bf45d9 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 15 Sep 2017 19:06:47 +0100 Subject: COMPMID-534: Add MemoryManager support in OpenCL functions Adds support for: -CLConvolution -CLGEMM -CLGEMMLowp -CLHOGDescriptor -CLHOGGradient -CLHOGMultiDetection -CLL2Normalize -CLLocallyConnectedLayer -CLOpticalFlow -CLReductionOperation Change-Id: Ib13354d274ccf32ae933f3fbbad3ac3896cfd3bd Reviewed-on: http://mpd-gerrit.cambridge.arm.com/87938 Tested-by: Kaizen Reviewed-by: Pablo Tello --- arm_compute/runtime/CL/functions/CLConvolution.h | 6 +++++- arm_compute/runtime/CL/functions/CLGEMM.h | 7 ++++++- arm_compute/runtime/CL/functions/CLGEMMLowp.h | 12 ++++++++---- arm_compute/runtime/CL/functions/CLHOGDescriptor.h | 7 ++++++- arm_compute/runtime/CL/functions/CLHOGGradient.h | 6 +++++- arm_compute/runtime/CL/functions/CLHOGMultiDetection.h | 7 ++++++- arm_compute/runtime/CL/functions/CLL2Normalize.h | 6 +++++- arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h | 7 ++++++- arm_compute/runtime/CL/functions/CLOpticalFlow.h | 5 ++++- arm_compute/runtime/CL/functions/CLReductionOperation.h | 5 ++++- 10 files changed, 55 insertions(+), 13 deletions(-) (limited to 'arm_compute/runtime/CL') diff --git a/arm_compute/runtime/CL/functions/CLConvolution.h b/arm_compute/runtime/CL/functions/CLConvolution.h index f526f6ff4a..bc05cb2a85 100644 --- a/arm_compute/runtime/CL/functions/CLConvolution.h +++ b/arm_compute/runtime/CL/functions/CLConvolution.h @@ -27,11 +27,14 @@ #include "arm_compute/core/CL/kernels/CLConvolutionKernel.h" #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" #include +#include namespace arm_compute { @@ -70,7 +73,7 @@ class CLConvolutionSquare : public IFunction { public: /** Default constructor */ - CLConvolutionSquare(); + CLConvolutionSquare(std::shared_ptr memory_manager = nullptr); /** Initialize the function's source, destination, conv and border_mode. * * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) @@ -86,6 +89,7 @@ public: void run() override; private: + CLMemoryGroup _memory_group; /**< Function's memory group */ CLTensor _tmp; /**< temporary buffer for output of horizontal pass */ bool _is_separable; /**< true if the convolution can be separated */ CLSeparableConvolutionHorKernel _kernel_hor; /**< kernel for horizontal pass of separated convolution */ diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h index 9b887305cb..2765b77b7d 100644 --- a/arm_compute/runtime/CL/functions/CLGEMM.h +++ b/arm_compute/runtime/CL/functions/CLGEMM.h @@ -29,8 +29,12 @@ #include "arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" + +#include namespace arm_compute { @@ -48,7 +52,7 @@ class CLGEMM : public IFunction { public: /** Default constructor. */ - CLGEMM(); + CLGEMM(std::shared_ptr memory_manager = nullptr); /** Initialise the kernel's inputs and output * * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C]. @@ -70,6 +74,7 @@ public: void run() override; private: + CLMemoryGroup _memory_group; CLGEMMInterleave4x4Kernel _interleave_kernel; CLGEMMTranspose1xWKernel _transpose_kernel; CLGEMMMatrixMultiplyKernel _mm_kernel; diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowp.h b/arm_compute/runtime/CL/functions/CLGEMMLowp.h index da8883c3f8..613fcaa7e0 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMLowp.h +++ b/arm_compute/runtime/CL/functions/CLGEMMLowp.h @@ -25,12 +25,15 @@ #define __ARM_COMPUTE_CLGEMMLOWP_H__ #include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/IFunction.h" - #include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h" #include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" + +#include namespace arm_compute { @@ -47,7 +50,7 @@ class CLGEMMLowp : public IFunction { public: /** Constructor */ - CLGEMMLowp(); + CLGEMMLowp(std::shared_ptr memory_manager = nullptr); /** Initialise the kernel's inputs, output * * @note GEMM_LOWP: low precision matrix multiply kernel @@ -75,6 +78,7 @@ public: void run() override; private: + CLMemoryGroup _memory_group; CLGEMMInterleave4x4Kernel _interleave_kernel; CLGEMMTranspose1xWKernel _transpose_kernel; CLGEMMLowpMatrixMultiplyKernel _mm_kernel; diff --git a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h index cdb23bff33..00d64f109f 100644 --- a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h +++ b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h @@ -26,9 +26,13 @@ #include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLHOGGradient.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" + +#include namespace arm_compute { @@ -44,7 +48,7 @@ class CLHOGDescriptor : public IFunction { public: /** Default constructor */ - CLHOGDescriptor(); + CLHOGDescriptor(std::shared_ptr memory_manager = nullptr); /** Initialise the function's source, destination, HOG data-object and border mode * * @param[in, out] input Input tensor. Data type supported: U8 @@ -60,6 +64,7 @@ public: void run() override; private: + CLMemoryGroup _memory_group; CLHOGGradient _gradient; CLHOGOrientationBinningKernel _orient_bin; CLHOGBlockNormalizationKernel _block_norm; diff --git a/arm_compute/runtime/CL/functions/CLHOGGradient.h b/arm_compute/runtime/CL/functions/CLHOGGradient.h index e74a68497f..051e5860d7 100644 --- a/arm_compute/runtime/CL/functions/CLHOGGradient.h +++ b/arm_compute/runtime/CL/functions/CLHOGGradient.h @@ -28,11 +28,14 @@ #include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLDerivative.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" #include +#include namespace arm_compute { @@ -46,7 +49,7 @@ class CLHOGGradient : public IFunction { public: /** Default constructor */ - CLHOGGradient(); + CLHOGGradient(std::shared_ptr memory_manager = nullptr); /** Initialise the function's source, destinations, phase type and border mode * * @param[in, out] input Input tensor. Data type supported: U8. @@ -63,6 +66,7 @@ public: void run() override; private: + CLMemoryGroup _memory_group; CLDerivative _derivative; CLMagnitudePhaseKernel _mag_phase; CLTensor _gx; diff --git a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h index 3fe0fa932a..1ff986511e 100644 --- a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h +++ b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h @@ -28,10 +28,14 @@ #include "arm_compute/core/CL/ICLMultiHOG.h" #include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h" #include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLHOGDetector.h" #include "arm_compute/runtime/CL/functions/CLHOGGradient.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" + +#include namespace arm_compute { @@ -53,7 +57,7 @@ class CLHOGMultiDetection : public IFunction { public: /** Default constructor */ - CLHOGMultiDetection(); + CLHOGMultiDetection(std::shared_ptr memory_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ CLHOGMultiDetection(const CLHOGMultiDetection &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ @@ -85,6 +89,7 @@ public: void run() override; private: + CLMemoryGroup _memory_group; CLHOGGradient _gradient_kernel; std::unique_ptr _orient_bin_kernel; std::unique_ptr _block_norm_kernel; diff --git a/arm_compute/runtime/CL/functions/CLL2Normalize.h b/arm_compute/runtime/CL/functions/CLL2Normalize.h index 52c562c61b..20af54eda2 100644 --- a/arm_compute/runtime/CL/functions/CLL2Normalize.h +++ b/arm_compute/runtime/CL/functions/CLL2Normalize.h @@ -26,11 +26,14 @@ #include "arm_compute/core/CL/kernels/CLL2NormalizeKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" #include "arm_compute/runtime/CL/functions/CLReductionOperation.h" +#include "arm_compute/runtime/IMemoryManager.h" #include +#include namespace arm_compute { @@ -42,7 +45,7 @@ class CLL2Normalize : public IFunction { public: /** Constructor */ - CLL2Normalize(); + CLL2Normalize(std::shared_ptr memory_manager = nullptr); /** Set the input and output tensors. * @@ -57,6 +60,7 @@ public: void run() override; private: + CLMemoryGroup _memory_group; CLReductionOperation _reduce_func; CLL2NormalizeKernel _normalize_kernel; CLTensor _sumsq; diff --git a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h index 5f4f1ba1d7..f56039f62a 100644 --- a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h +++ b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h @@ -31,7 +31,11 @@ #include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h" #include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IMemoryManager.h" + +#include namespace arm_compute { @@ -48,7 +52,7 @@ class CLLocallyConnectedLayer : public IFunction { public: /** Default constructor */ - CLLocallyConnectedLayer(); + CLLocallyConnectedLayer(std::shared_ptr memory_manager = nullptr); /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -66,6 +70,7 @@ public: void run() override; private: + CLMemoryGroup _memory_group; CLIm2ColKernel _input_im2col_kernel; CLWeightsReshapeKernel _weights_reshape_kernel; CLLocallyConnectedMatrixMultiplyKernel _mm_kernel; diff --git a/arm_compute/runtime/CL/functions/CLOpticalFlow.h b/arm_compute/runtime/CL/functions/CLOpticalFlow.h index ca3f86100e..94dda186bf 100644 --- a/arm_compute/runtime/CL/functions/CLOpticalFlow.h +++ b/arm_compute/runtime/CL/functions/CLOpticalFlow.h @@ -29,9 +29,11 @@ #include "arm_compute/core/IArray.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLArray.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLScharr3x3.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" #include #include @@ -57,7 +59,7 @@ class CLOpticalFlow : public IFunction { public: /** Default constructor */ - CLOpticalFlow(); + CLOpticalFlow(std::shared_ptr memory_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ CLOpticalFlow(const CLOpticalFlow &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ @@ -91,6 +93,7 @@ public: void run() override; private: + CLMemoryGroup _memory_group; std::unique_ptr _tracker_init_kernel; std::unique_ptr _tracker_stage0_kernel; std::unique_ptr _tracker_stage1_kernel; diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h index 89fdad2b24..09beabad8d 100644 --- a/arm_compute/runtime/CL/functions/CLReductionOperation.h +++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h @@ -27,8 +27,10 @@ #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" #include #include @@ -44,7 +46,7 @@ class CLReductionOperation : public IFunction { public: /* Constructor */ - CLReductionOperation(); + CLReductionOperation(std::shared_ptr memory_manager = nullptr); /** Set the input and output tensors. * @@ -59,6 +61,7 @@ public: void run() override; private: + CLMemoryGroup _memory_group; std::vector _sums_vector{ nullptr }; std::unique_ptr _reduction_kernels_vector{ nullptr }; std::unique_ptr _border_handlers_vector{ nullptr }; -- cgit v1.2.1