aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-12-07 18:31:47 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2018-12-13 10:42:12 +0000
commit05045c1e052dbba4e44bf0bb8ead3e9b5220d04e (patch)
treee17a64e9cd0f0927bd75f540b6aeb55ba24953d4 /arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
parent35767bc09f21050a9767a91b086b327afc928a81 (diff)
downloadComputeLibrary-05045c1e052dbba4e44bf0bb8ead3e9b5220d04e.tar.gz
COMPMID-1071: (3RDPARTY_UPDATE) Add depth multiplier on DepthwiseConv 3x3 NHWC
Change-Id: I316ff40dda379d4b84fac5d63f0c56efbacbc2b4 Reviewed-on: https://review.mlplatform.org/371 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Diffstat (limited to 'arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h')
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h25
1 files changed, 23 insertions, 2 deletions
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
index 4863101ccf..60dddbb853 100644
--- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
@@ -34,8 +34,10 @@
#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h"
#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLPermute.h"
#include "arm_compute/runtime/IFunction.h"
namespace arm_compute
@@ -53,7 +55,15 @@ class CLDepthwiseConvolutionLayer3x3 : public IFunction
{
public:
/** Default constructor */
- CLDepthwiseConvolutionLayer3x3();
+ CLDepthwiseConvolutionLayer3x3(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthwiseConvolutionLayer3x3(const CLDepthwiseConvolutionLayer3x3 &) = delete;
+ /** Default move constructor */
+ CLDepthwiseConvolutionLayer3x3(CLDepthwiseConvolutionLayer3x3 &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthwiseConvolutionLayer3x3 &operator=(const CLDepthwiseConvolutionLayer3x3 &) = delete;
+ /** Default move assignment operator */
+ CLDepthwiseConvolutionLayer3x3 &operator=(CLDepthwiseConvolutionLayer3x3 &&) = default;
/** Initialize the function's source, destination, conv and border_size.
*
* @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
@@ -86,10 +96,21 @@ public:
ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD);
// Inherited methods overriden:
void run() override;
+ void prepare() override;
private:
+ CLMemoryGroup _memory_group;
std::unique_ptr<ICLDepthwiseConvolutionLayer3x3Kernel> _kernel;
CLFillBorderKernel _border_handler;
+ CLPermute _permute_input_to_nchw;
+ CLPermute _permute_weights_to_nchw;
+ CLPermute _permute_output_to_nhwc;
+ CLTensor _permuted_input;
+ CLTensor _permuted_weights;
+ CLTensor _permuted_output;
+ const ITensor *_original_weights;
+ bool _needs_permute;
+ bool _is_prepared;
};
/** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels:
@@ -166,5 +187,5 @@ private:
const ICLTensor *_original_weights;
std::unique_ptr<IFunction> _optimised_function;
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ */