From 05045c1e052dbba4e44bf0bb8ead3e9b5220d04e Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 7 Dec 2018 18:31:47 +0000 Subject: COMPMID-1071: (3RDPARTY_UPDATE) Add depth multiplier on DepthwiseConv 3x3 NHWC Change-Id: I316ff40dda379d4b84fac5d63f0c56efbacbc2b4 Reviewed-on: https://review.mlplatform.org/371 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio --- .../CL/functions/CLDepthwiseConvolutionLayer.h | 25 ++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h') diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h index 4863101ccf..60dddbb853 100644 --- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h @@ -34,8 +34,10 @@ #include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h" #include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" +#include "arm_compute/runtime/CL/functions/CLPermute.h" #include "arm_compute/runtime/IFunction.h" namespace arm_compute @@ -53,7 +55,15 @@ class CLDepthwiseConvolutionLayer3x3 : public IFunction { public: /** Default constructor */ - CLDepthwiseConvolutionLayer3x3(); + CLDepthwiseConvolutionLayer3x3(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseConvolutionLayer3x3(const CLDepthwiseConvolutionLayer3x3 &) = delete; + /** Default move constructor */ + CLDepthwiseConvolutionLayer3x3(CLDepthwiseConvolutionLayer3x3 &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseConvolutionLayer3x3 &operator=(const CLDepthwiseConvolutionLayer3x3 &) = delete; + /** Default move assignment operator */ + CLDepthwiseConvolutionLayer3x3 &operator=(CLDepthwiseConvolutionLayer3x3 &&) = default; /** Initialize the function's source, destination, conv and border_size. * * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling). @@ -86,10 +96,21 @@ public: ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD); // Inherited methods overriden: void run() override; + void prepare() override; private: + CLMemoryGroup _memory_group; std::unique_ptr _kernel; CLFillBorderKernel _border_handler; + CLPermute _permute_input_to_nchw; + CLPermute _permute_weights_to_nchw; + CLPermute _permute_output_to_nhwc; + CLTensor _permuted_input; + CLTensor _permuted_weights; + CLTensor _permuted_output; + const ITensor *_original_weights; + bool _needs_permute; + bool _is_prepared; }; /** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels: @@ -166,5 +187,5 @@ private: const ICLTensor *_original_weights; std::unique_ptr _optimised_function; }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ */ -- cgit v1.2.1