COMPMID-2306: CLDepthwiseConvolution: support for QUANT8_PER_CHANNEL_SYMM

Change-Id: I18c886400daa2dcba0b91011bc4e503d807a4732 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/2143 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
author: Michele Di Giorgio <michele.digiorgio@arm.com> 2019-10-09 15:32:39 +0100
committer: Michele Di Giorgio <michele.digiorgio@arm.com> 2019-10-30 14:44:46 +0000
commit: df4cf57c7394265b27d051cb1cf0152c53659126 (patch)
tree: 87da5d6abeff65b2cee55b63f73bb268776af560 /arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
parent: 8b72199f25487040713d1668c998fdde3707413c (diff)
download: ComputeLibrary-df4cf57c7394265b27d051cb1cf0152c53659126.tar.gz
1 files changed, 38 insertions, 23 deletions
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
index 31ec871123..8e8df9c1f6 100644
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
@@ -49,36 +49,48 @@ public:
     CLDepthwiseConvolutionLayerNativeKernel &operator=(CLDepthwiseConvolutionLayerNativeKernel &&) = default;
     /** Initialize the function's source, destination and parameters
      *
-     * @param[in]  input            Source tensor. Data type supported: QASYMM8/FP32/FP16. Data layout supported: NHWC
-     * @param[in]  weights          Weights tensor. A 3D tensor with dimensions [IFM, N, M]. Data type supported: Same as @p input.
-     * @param[in]  biases           Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
-     *                              Data type supported: Same as @p input.
-     * @param[out] output           Destination tensor. Data type supported: Same as @p input.
-     * @param[in]  dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread
-     * @param[in]  dwc_info         Depthwise convolution layer info
-     * @param[in]  conv_info        Padding and stride information to use for the convolution.
-     * @param[in]  depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
-     * @param[in]  dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in]  input              Source tensor. Data type supported: QASYMM8/FP32/FP16. Data layout supported: NHWC
+     * @param[in]  weights            Weights tensor. A 3D tensor with dimensions [IFM, N, M].
+     *                                Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+     * @param[in]  biases             Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+     *                                Data type supported: Same as @p input, S32 when input is QASYMM8.
+     * @param[out] output             Destination tensor. Data type supported: Same as @p input.
+     * @param[in]  dwc_weights_info   Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread
+     * @param[in]  dwc_info           Depthwise convolution layer info
+     * @param[in]  conv_info          Padding and stride information to use for the convolution.
+     * @param[in]  depth_multiplier   (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in]  dilation           (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in]  output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
+     *                                the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+     * @param[in]  output_shifts      (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
+     *                                the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
      */
-    void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info, const DWCKernelInfo &dwc_info,
-                   const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U));
+    void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info,
+                   const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U),
+                   const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
     /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerNativeKernel
      *
-     * @param[in] input            Source tensor info. Data type supported: QASYMM8/FP32/FP16. Data layout supported: NHWC
-     * @param[in] weights          Weights tensor info. A 3D tensor with dimensions [IFM, N, M]. Data type supported: Same as @p input.
-     * @param[in] biases           Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
-     *                             Data type supported: Same as @p input.
-     * @param[in] output           Destination tensor info. Data type supported: Same as @p input.
-     * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread
-     * @param[in] dwc_info         Depthwise convolution layer info
-     * @param[in] conv_info        Padding and stride information to use for the convolution.
-     * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
-     * @param[in] dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in] input              Source tensor info. Data type supported: QASYMM8/FP32/FP16. Data layout supported: NHWC
+     * @param[in] weights            Weights tensor info. A 3D tensor with dimensions [IFM, N, M].
+     *                               Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+     * @param[in] biases             Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+     *                               Data type supported: Same as @p input, S32 when input is QASYMM8.
+     * @param[in] output             Destination tensor info. Data type supported: Same as @p input.
+     * @param[in] dwc_weights_info   Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread
+     * @param[in] dwc_info           Depthwise convolution layer info
+     * @param[in] conv_info          Padding and stride information to use for the convolution.
+     * @param[in] depth_multiplier   (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in] dilation           (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
+     *                               the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+     * @param[in] output_shifts      (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
+     *                               the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
      *
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const DWCWeightsKernelInfo &dwc_weights_info,
-                           const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U));
+                           const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U),
+                           const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
@@ -89,6 +101,9 @@ private:
     const ICLTensor *_biases;
     ICLTensor       *_output;
     unsigned int     _depth_multiplier;
+    const ICLTensor *_output_multipliers;
+    const ICLTensor *_output_shifts;
+    bool             _is_quantized;
 };
 } // namespace arm_compute
 #endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H__ */
author	Michele Di Giorgio <michele.digiorgio@arm.com>	2019-10-09 15:32:39 +0100
committer	Michele Di Giorgio <michele.digiorgio@arm.com>	2019-10-30 14:44:46 +0000
commit	df4cf57c7394265b27d051cb1cf0152c53659126 (patch)
tree	87da5d6abeff65b2cee55b63f73bb268776af560 /arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
parent	8b72199f25487040713d1668c998fdde3707413c (diff)
download	ComputeLibrary-df4cf57c7394265b27d051cb1cf0152c53659126.tar.gz