From dcf4c87cf78a5f1667699c1a3511d09356938660 Mon Sep 17 00:00:00 2001
From: Giorgio Arena <giorgio.arena@arm.com>
Date: Fri, 16 Apr 2021 12:41:45 +0100
Subject: CLDepthwiseConvolutionLayer rework - Part 1

Remove the reshaped variant for CLDepthwiseConvolutionLayer 3x3 NHWC Quantized

- Remove kernel selection by GPUTarget
- Remove unused quantized support from the NHWC kernel
- Remove CLDepthwiseConvolutionLayerReshapeWeightsKernel
- Remove OpenCL kernels for reshaped dwc 3x3 quantized and weights reshape
- Remove the "_bifrost" suffix in common OpenCL kernel
- Remove the ICLDepthwiseConvolutionLayer3x3Kernel common interface

Resolve COMPMID-3864, COMPMID-3907

Change-Id: Icfac0fb6c00e214985beb05dad7c0cdbbee7d830
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5447
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 .../CLDepthwiseConvolutionLayer3x3NHWCKernel.h     | 103 ++++++++++-----------
 1 file changed, 50 insertions(+), 53 deletions(-)

(limited to 'src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h')

diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h
index ce0bf5ceb3..ee47d98807 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,7 @@
 #ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H
 #define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H
 
-#include "src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
+#include "src/core/CL/ICLKernel.h"
 
 namespace arm_compute
 {
@@ -32,81 +32,78 @@ class ICLTensor;
 
 /** Interface for the kernel to run a 3x3 depthwise convolution on a tensor when the data layout is NHWC.
  */
-class CLDepthwiseConvolutionLayer3x3NHWCKernel : public ICLDepthwiseConvolutionLayer3x3Kernel
+class CLDepthwiseConvolutionLayer3x3NHWCKernel : public ICLKernel
 {
 public:
     /** Default constructor */
     CLDepthwiseConvolutionLayer3x3NHWCKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLDepthwiseConvolutionLayer3x3NHWCKernel(const CLDepthwiseConvolutionLayer3x3NHWCKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLDepthwiseConvolutionLayer3x3NHWCKernel &operator=(const CLDepthwiseConvolutionLayer3x3NHWCKernel &) = delete;
+    /** Default Move Constructor. */
+    CLDepthwiseConvolutionLayer3x3NHWCKernel(CLDepthwiseConvolutionLayer3x3NHWCKernel &&) = default;
+    /** Default move assignment operator */
+    CLDepthwiseConvolutionLayer3x3NHWCKernel &operator=(CLDepthwiseConvolutionLayer3x3NHWCKernel &&) = default;
     /** Default move assignment operator. */
     /** Initialize the function's source, destination, conv and border_size.
      *
-     * @param[in]  input              Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in]  weights            Weights tensor. A 3D tensor with dimensions [IFM, 3, 3].
-     *                                Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
-     * @param[in]  biases             Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
-     *                                Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
-     * @param[out] output             Destination tensor. Data type supported: Same as @p input.
-     * @param[in]  conv_info          Padding and stride information to use for the convolution.
-     * @param[in]  depth_multiplier   (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
-     * @param[in]  act_info           (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
-     * @param[in]  dilation           (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
-     * @param[in]  output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
-     *                                the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
-     * @param[in]  output_shifts      (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
-     *                                the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+     * @param[in]  input            Source tensor. DataType supported: F16/F32.
+     * @param[in]  weights          Weights tensor. A 3D tensor with dimensions [IFM, 3, 3].
+     *                              Data type supported: Same as @p input.
+     * @param[in]  biases           Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+     *                              Data type supported: Same as @p input.
+     * @param[out] output           Destination tensor. Data type supported: Same as @p input.
+     * @param[in]  conv_info        Padding and stride information to use for the convolution.
+     * @param[in]  depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in]  act_info         (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
+     * @param[in]  dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      */
     void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
-                   unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
-                   const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
+                   unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
     /** Initialize the function's source, destination, conv and border_size.
      *
-     * @param[in]  compile_context    The compile context to be used.
-     * @param[in]  input              Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in]  weights            Weights tensor. A 3D tensor with dimensions [IFM, 3, 3].
-     *                                Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
-     * @param[in]  biases             Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
-     *                                Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
-     * @param[out] output             Destination tensor. Data type supported: Same as @p input.
-     * @param[in]  conv_info          Padding and stride information to use for the convolution.
-     * @param[in]  depth_multiplier   (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
-     * @param[in]  act_info           (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
-     * @param[in]  dilation           (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
-     * @param[in]  output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
-     *                                the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
-     * @param[in]  output_shifts      (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
-     *                                the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            Source tensor. DataType supported: F16/F32.
+     * @param[in]  weights          Weights tensor. A 3D tensor with dimensions [IFM, 3, 3].
+     *                              Data type supported: Same as @p input.
+     * @param[in]  biases           Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+     *                              Data type supported: Same as @p input.
+     * @param[out] output           Destination tensor. Data type supported: Same as @p input.
+     * @param[in]  conv_info        Padding and stride information to use for the convolution.
+     * @param[in]  depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in]  act_info         (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
+     * @param[in]  dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      */
     void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
-                   unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
-                   const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
+                   unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
     /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel
      *
-     * @param[in] input              Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in] weights            Weights tensor info. A 3D tensor with dimensions [IFM, 3, 3].
-     *                               Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
-     * @param[in] biases             Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
-     *                               Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
-     * @param[in] output             Destination tensor info. Data type supported: Same as @p input.
-     * @param[in] conv_info          Padding and stride information to use for the convolution.
-     * @param[in] depth_multiplier   (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
-     * @param[in] act_info           (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
-     * @param[in] dilation           (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
-     * @param[in] output_multipliers (Optional) Output multipliers tensor info for quantized computations. In case of per-channel quantization,
-     *                               the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
-     * @param[in] output_shifts      (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
-     *                               the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+     * @param[in] input            Source tensor info. DataType supported: F16/F32.
+     * @param[in] weights          Weights tensor info. A 3D tensor with dimensions [IFM, 3, 3].
+     *                             Data type supported: Same as @p input.
+     * @param[in] biases           Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+     *                             Data type supported: Same as @p input.
+     * @param[in] output           Destination tensor info. Data type supported: Same as @p input.
+     * @param[in] conv_info        Padding and stride information to use for the convolution.
+     * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in] act_info         (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
+     * @param[in] dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      *
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                           unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
-                           const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr);
+                           unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
-    BorderSize border_size() const override;
 
 private:
+    const ICLTensor *_input;
+    ICLTensor       *_output;
+    const ICLTensor *_weights;
+    const ICLTensor *_biases;
+
     unsigned int _num_planes_processed_per_iteration;
 };
 } // namespace arm_compute
-- 
cgit v1.2.1