aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2021-04-16 12:41:45 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-04-20 13:32:10 +0000
commitdcf4c87cf78a5f1667699c1a3511d09356938660 (patch)
tree28aa191a226e4bf4350d622fcb668abaa17e8677 /src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h
parent562bee584f3633167725af7915f50d07b0597f10 (diff)
downloadComputeLibrary-dcf4c87cf78a5f1667699c1a3511d09356938660.tar.gz
CLDepthwiseConvolutionLayer rework - Part 1
Remove the reshaped variant for CLDepthwiseConvolutionLayer 3x3 NHWC Quantized - Remove kernel selection by GPUTarget - Remove unused quantized support from the NHWC kernel - Remove CLDepthwiseConvolutionLayerReshapeWeightsKernel - Remove OpenCL kernels for reshaped dwc 3x3 quantized and weights reshape - Remove the "_bifrost" suffix in common OpenCL kernel - Remove the ICLDepthwiseConvolutionLayer3x3Kernel common interface Resolve COMPMID-3864, COMPMID-3907 Change-Id: Icfac0fb6c00e214985beb05dad7c0cdbbee7d830 Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5447 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h')
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h31
1 files changed, 24 insertions, 7 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h
index 45b5869676..c4e475f6f2 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H
#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H
-#include "src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
@@ -32,11 +32,19 @@ class ICLTensor;
/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor when the data layout is NCHW.
*/
-class CLDepthwiseConvolutionLayer3x3NCHWKernel : public ICLDepthwiseConvolutionLayer3x3Kernel
+class CLDepthwiseConvolutionLayer3x3NCHWKernel : public ICLKernel
{
public:
/** Default constructor */
CLDepthwiseConvolutionLayer3x3NCHWKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthwiseConvolutionLayer3x3NCHWKernel(const CLDepthwiseConvolutionLayer3x3NCHWKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthwiseConvolutionLayer3x3NCHWKernel &operator=(const CLDepthwiseConvolutionLayer3x3NCHWKernel &) = delete;
+ /** Default Move Constructor. */
+ CLDepthwiseConvolutionLayer3x3NCHWKernel(CLDepthwiseConvolutionLayer3x3NCHWKernel &&) = default;
+ /** Default move assignment operator */
+ CLDepthwiseConvolutionLayer3x3NCHWKernel &operator=(CLDepthwiseConvolutionLayer3x3NCHWKernel &&) = default;
/** Initialize the function's source, destination, conv and border_size.
*
* @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
@@ -56,7 +64,7 @@ public:
*/
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
- const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
+ const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
/** Initialize the function's source, destination, conv and border_size.
*
* @param[in] compile_context The compile context to be used.
@@ -77,7 +85,7 @@ public:
*/
void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
- const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
+ const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
/** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NCHWKernel
*
* @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
@@ -89,7 +97,6 @@ public:
* @param[in] conv_info Padding and stride information to use for the convolution.
* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
- * @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
* @param[in] output_multipliers (Optional) Output multipliers tensor info for quantized computations. In case of per-channel quantization,
* the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
@@ -99,13 +106,23 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD,
+ unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(),
const Size2D &dilation = Size2D(1U, 1U), const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr);
void run(const Window &window, cl::CommandQueue &queue) override;
BorderSize border_size() const override;
private:
+ BorderSize _border_size;
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ const ICLTensor *_weights;
+ const ICLTensor *_biases;
+ unsigned int _conv_stride_y;
+ const ICLTensor *_output_multipliers;
+ const ICLTensor *_output_shifts;
+ bool _is_quantized;
+
unsigned int _conv_stride_x;
unsigned int _conv_pad_top;
unsigned int _conv_pad_left;