aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h')
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h25
1 files changed, 23 insertions, 2 deletions
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
index 4863101ccf..60dddbb853 100644
--- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
@@ -34,8 +34,10 @@
#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h"
#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLPermute.h"
#include "arm_compute/runtime/IFunction.h"
namespace arm_compute
@@ -53,7 +55,15 @@ class CLDepthwiseConvolutionLayer3x3 : public IFunction
{
public:
/** Default constructor */
- CLDepthwiseConvolutionLayer3x3();
+ CLDepthwiseConvolutionLayer3x3(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthwiseConvolutionLayer3x3(const CLDepthwiseConvolutionLayer3x3 &) = delete;
+ /** Default move constructor */
+ CLDepthwiseConvolutionLayer3x3(CLDepthwiseConvolutionLayer3x3 &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthwiseConvolutionLayer3x3 &operator=(const CLDepthwiseConvolutionLayer3x3 &) = delete;
+ /** Default move assignment operator */
+ CLDepthwiseConvolutionLayer3x3 &operator=(CLDepthwiseConvolutionLayer3x3 &&) = default;
/** Initialize the function's source, destination, conv and border_size.
*
* @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
@@ -86,10 +96,21 @@ public:
ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD);
// Inherited methods overriden:
void run() override;
+ void prepare() override;
private:
+ CLMemoryGroup _memory_group;
std::unique_ptr<ICLDepthwiseConvolutionLayer3x3Kernel> _kernel;
CLFillBorderKernel _border_handler;
+ CLPermute _permute_input_to_nchw;
+ CLPermute _permute_weights_to_nchw;
+ CLPermute _permute_output_to_nhwc;
+ CLTensor _permuted_input;
+ CLTensor _permuted_weights;
+ CLTensor _permuted_output;
+ const ITensor *_original_weights;
+ bool _needs_permute;
+ bool _is_prepared;
};
/** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels:
@@ -166,5 +187,5 @@ private:
const ICLTensor *_original_weights;
std::unique_ptr<IFunction> _optimised_function;
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ */