aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h')
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h49
1 files changed, 23 insertions, 26 deletions
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
index e2c5d683cf..1af9e1dc6f 100644
--- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
@@ -35,8 +35,8 @@ namespace arm_compute
class CLCompileContext;
class CLFillBorderKernel;
class CLDepthwiseConvolutionLayerNativeKernel;
-class CLDepthwiseConvolutionLayerReshapeWeightsKernel;
-class ICLDepthwiseConvolutionLayer3x3Kernel;
+class CLDepthwiseConvolutionLayer3x3NCHWKernel;
+class CLDepthwiseConvolutionLayer3x3NHWCKernel;
class ICLTensor;
/** Function to execute a depthwise convolution
@@ -123,19 +123,17 @@ private:
* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard.
*
* @return a Depthwise Convolution Function
*/
static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
- ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), GPUTarget gpu_target = GPUTarget::MIDGARD);
+ ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
/** Basic function to execute a depthwise convolution for kernel size 3x3xC (when data layout NCHW) or Cx3x3 (when data layout NHWC). This function calls the following OpenCL kernels:
*
* -# @ref CLDepthwiseConvolutionLayer3x3NCHWKernel (if data_layout == NCHW)
* -# @ref CLDepthwiseConvolutionLayer3x3NHWCKernel (if data_layout == NHWC)
- * -# @ref CLDepthwiseConvolutionLayerReshapeWeightsKernel (if data_layout == NHWC)
* -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)
*
*/
@@ -200,7 +198,7 @@ private:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
- ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD, const Size2D &dilation = Size2D(1U, 1U));
+ ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
// Inherited methods overriden:
void run() override;
@@ -212,26 +210,25 @@ private:
};
private:
- MemoryGroup _memory_group;
- std::unique_ptr<ICLDepthwiseConvolutionLayer3x3Kernel> _kernel;
- std::unique_ptr<CLFillBorderKernel> _border_handler;
- CLPermute _permute_input_to_nchw;
- CLPermute _permute_weights_to_nchw;
- CLPermute _permute_output_to_nhwc;
- std::unique_ptr<CLDepthwiseConvolutionLayerReshapeWeightsKernel> _reshape_weights;
- CLTensor _permuted_input;
- CLTensor _permuted_weights;
- CLTensor _permuted_output;
- CLTensor _output_multipliers;
- CLTensor _output_shifts;
- const ITensor *_original_weights;
- const ITensor *_input;
- const ITensor *_output;
- bool _needs_permute;
- bool _needs_weights_reshape;
- bool _is_prepared;
- bool _is_quantized;
- bool _is_nhwc;
+ MemoryGroup _memory_group;
+ std::unique_ptr<CLDepthwiseConvolutionLayer3x3NCHWKernel> _kernel_nchw;
+ std::unique_ptr<CLDepthwiseConvolutionLayer3x3NHWCKernel> _kernel_nhwc;
+ std::unique_ptr<CLFillBorderKernel> _border_handler;
+ CLPermute _permute_input_to_nchw;
+ CLPermute _permute_weights_to_nchw;
+ CLPermute _permute_output_to_nhwc;
+ CLTensor _permuted_input;
+ CLTensor _permuted_weights;
+ CLTensor _permuted_output;
+ CLTensor _output_multipliers;
+ CLTensor _output_shifts;
+ const ITensor *_original_weights;
+ const ITensor *_input;
+ const ITensor *_output;
+ bool _needs_permute;
+ bool _is_prepared;
+ bool _is_quantized;
+ bool _is_nhwc;
};
/** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels: