1 files changed, 35 insertions, 23 deletions
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h
index 3b7fc7b7dc..a2f61ee058 100644
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h
@@ -39,35 +39,47 @@ public:
     CLDepthwiseConvolutionLayer3x3NCHWKernel();
     /** Initialize the function's source, destination, conv and border_size.
      *
-     * @param[in]  input            Source tensor. DataType supported: QASYMM8/F16/F32.
-     * @param[in]  weights          Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input.
-     * @param[in]  biases           Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
-     *                              Data type supported: Same as @p input.
-     * @param[out] output           Destination tensor. Data type supported: Same as @p input.
-     * @param[in]  conv_info        Padding and stride information to use for the convolution.
-     * @param[in]  depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
-     * @param[in]  act_info         (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported.
-     * @param[in]  dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in]  input              Source tensor. DataType supported: QASYMM8/F16/F32.
+     * @param[in]  weights            Weights tensor. A 3D tensor with dimensions [3, 3, IFM].
+     *                                Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+     * @param[in]  biases             Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+     *                                Data type supported: Same as @p input, S32 when input is QASYMM8.
+     * @param[out] output             Destination tensor. Data type supported: Same as @p input.
+     * @param[in]  conv_info          Padding and stride information to use for the convolution.
+     * @param[in]  depth_multiplier   (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in]  act_info           (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported.
+     * @param[in]  dilation           (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in]  output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
+     *                                the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+     * @param[in]  output_shifts      (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
+     *                                the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
      */
-    void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
-                   ActivationLayerInfo act_info, const Size2D &dilation) override;
+    void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                   unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
+                   const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
     /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NCHWKernel
      *
-     * @param[in] input            Source tensor info. DataType supported: F16/F32/QASYMM8.
-     * @param[in] weights          Weights tensor info. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input.
-     * @param[in] biases           Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
-     *                             Data type supported: Same as @p input.
-     * @param[in] output           Destination tensor. Data type supported: Same as @p input.
-     * @param[in] conv_info        Padding and stride information to use for the convolution.
-     * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
-     * @param[in] act_info         (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
-     * @param[in] gpu_target       (Optional) GPU target to validate the kernel for. Defaults to midgard.
-     * @param[in] dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in] input              Source tensor info. DataType supported: F16/F32/QASYMM8.
+     * @param[in] weights            Weights tensor info. A 3D tensor with dimensions [3, 3, IFM].
+     *                               Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+     * @param[in] biases             Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+     *                               Data type supported: Same as @p input, S32 when input is QASYMM8.
+     * @param[in] output             Destination tensor. Data type supported: Same as @p input.
+     * @param[in] conv_info          Padding and stride information to use for the convolution.
+     * @param[in] depth_multiplier   (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in] act_info           (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
+     * @param[in] gpu_target         (Optional) GPU target to validate the kernel for. Defaults to midgard.
+     * @param[in] dilation           (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in] output_multipliers (Optional) Output multipliers tensor info for quantized computations. In case of per-channel quantization,
+     *                               the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+     * @param[in] output_shifts      (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
+     *                               the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
-                           ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD, const Size2D &dilation = Size2D(1U, 1U));
+    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+                           unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD,
+                           const Size2D &dilation = Size2D(1U, 1U), const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr);
 
     void run(const Window &window, cl::CommandQueue &queue) override;
     BorderSize border_size() const override;