aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute/core')
-rw-r--r--arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h9
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h63
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h4
-rw-r--r--arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h10
-rw-r--r--arm_compute/core/Types.h1
-rw-r--r--arm_compute/core/utils/quantization/AsymmHelpers.h14
6 files changed, 60 insertions, 41 deletions
diff --git a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h
index 7475d8d41d..cce7b69a0e 100644
--- a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h
@@ -41,6 +41,7 @@ public:
*
* Valid conversions Input -> Output :
*
+ * - QSYMM8_PER_CHANNEL -> QASYMM8 (ATTENTION: it is the user's responsibility to keep track of the quantization info in the TensorInfo meta-data)
* - U8 -> S8, U16, S16, U32, S32, F16, F32
* - U16 -> U8, S8, S16, U32, S32, F16, F32
* - S16 -> U8, S8, U16, U32, S32, F16, F32
@@ -49,16 +50,16 @@ public:
* - F16 -> U8, S8, U16, S16, U32, F32
* - F32 -> U8, S8, U16, S16, U32, F16
*
- * @param[in] input The input tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
- * @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+ * @param[in] input The input tensor to convert. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32.
+ * @param[out] output The output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
* @param[in] policy Conversion policy
* @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8.
*/
void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
/** Static function to check if given info will lead to a valid configuration of @ref CLDepthConvertLayerKernel
*
- * @param[in] input Source tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
- * @param[in] output Destination tensor info. Data type supported: U8/S8/U16/S16/U32/S32/F16/F32.
+ * @param[in] input Source tensor info. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32.
+ * @param[in] output Destination tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
* @param[in] policy Conversion policy
* @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8.
*
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h
index de06c88d5c..301c67331e 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -51,39 +51,47 @@ public:
CLGEMMLowpOffsetContributionOutputStageKernel &operator=(CLGEMMLowpOffsetContributionOutputStageKernel &&) = default;
/** Initialise the kernel's input and output.
*
- * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpMatrixMultiplyKernel. Data type supported: S32
- * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
- * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: QASYMM8
- * @param[in] k Number of matrix A columns or Matrix B rows
- * @param[in] a_offset Offset to be added to each element of the matrix A.
- * @param[in] b_offset Offset to be added to each element of the matrix B.
- * @param[in] output_stage GEMMLowp output stage info
+ * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpMatrixMultiplyKernel. Data type supported: S32
+ * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
+ * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
+ * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[out] output Output tensor. Data type supported: QASYMM8.
+ * @param[in] k Number of matrix A columns or Matrix B rows
+ * @param[in] a_offset Offset to be added to each element of the matrix A.
+ * @param[in] b_offset Offset to be added to each element of the matrix B.
+ * @param[in] output_stage GEMMLowp output stage info
+ * @param[in] output_multipliers Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+ * Supported data types: S32
+ * @param[in] output_shifts Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+ * Supported data types: S32
*/
void configure(const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output, int32_t k, int32_t a_offset, int32_t b_offset,
- const GEMMLowpOutputStageInfo &output_stage);
+ const GEMMLowpOutputStageInfo &output_stage, const ICLTensor *output_multipliers, const ICLTensor *output_shifts);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel
*
- * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32 or QASYMM8 if output_stage != NONE
- * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
- * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data type supported: QASYMM8
- * @param[in] a_offset Offset to be added to each element of the matrix A.
- * @param[in] b_offset Offset to be added to each element of the matrix B.
- * @param[in] output_stage GEMMLowp output stage info
+ * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32 or QASYMM8 if output_stage != NONE
+ * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
+ * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
+ * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[in] output Output tensor. Data type supported: QASYMM8.
+ * @param[in] a_offset Offset to be added to each element of the matrix A.
+ * @param[in] b_offset Offset to be added to each element of the matrix B.
+ * @param[in] output_stage GEMMLowp output stage info
+ * @param[in] output_multipliers Output multipliers tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+ * Supported data types: S32
+ * @param[in] output_shifts Output shifts tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+ * Supported data types: S32
*
* @return a status
*/
static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias, const ITensorInfo *output, int32_t a_offset,
- int32_t b_offset, const GEMMLowpOutputStageInfo &output_stage);
+ int32_t b_offset, const GEMMLowpOutputStageInfo &output_stage, const ITensorInfo *output_multipliers, const ITensorInfo *output_shifts);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
@@ -94,6 +102,9 @@ private:
const ICLTensor *_vector_sum_row;
const ICLTensor *_bias;
ICLTensor *_output;
+ const ICLTensor *_output_multipliers;
+ const ICLTensor *_output_shifts;
+ bool _is_quantized_per_channel;
};
} // namespace arm_compute
diff --git a/arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h b/arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h
index 26ab210b21..937f6a9b89 100644
--- a/arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h
@@ -48,7 +48,7 @@ public:
CLGEMMReshapeRHSMatrixKernel &operator=(CLGEMMReshapeRHSMatrixKernel &&) = default;
/** Initialise the kernel's input and output.
*
- * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] input Input tensor. Data types supported: All
* @param[out] output Output tensor. Data type supported: same as @p input
* @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary
* information to reshape the input tensor. Only the following values are supported:
@@ -61,7 +61,7 @@ public:
void configure(const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMReshapeRHSMatrixKernel
*
- * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] input Input tensor info. Data types supported: All
* @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input.
* @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary
* information to reshape the input tensor. Only the following values are supported:
diff --git a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
index bdc5792641..59740b9db9 100644
--- a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
+++ b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -69,9 +69,9 @@ public:
/** Set the input and output of the kernel.
*
* @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
- * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: QASYMM8/F16/F32
+ * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All
* @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
- * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input
+ * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr.
* @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
* @param[out] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise.
* Data types supported: Same as @p input
@@ -82,9 +82,9 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLWeightsReshapeKernel
*
* @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
- * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: QASYMM8/F16/F32
+ * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All
* @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
- * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input
+ * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr.
* @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
* @param[in] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise.
* Data types supported: Same as @p input
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 851292f1e1..38d78971ef 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -1883,6 +1883,7 @@ struct GEMMLowpOutputStageInfo
int gemmlowp_max_bound{ 0 }; /**< GEMMLowp max value used to saturate down the output result before converting back to QASYMM8 */
std::vector<int32_t> gemmlowp_multipliers{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
std::vector<int32_t> gemmlowp_shifts{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
+ bool is_quantized_per_channel{ false }; /**< GEMMLowp quantized per-channel flag */
};
/** GEMM LHS (Left Hand Side) matrix information */
diff --git a/arm_compute/core/utils/quantization/AsymmHelpers.h b/arm_compute/core/utils/quantization/AsymmHelpers.h
index 6b6cb007e3..0bf6ff5c95 100644
--- a/arm_compute/core/utils/quantization/AsymmHelpers.h
+++ b/arm_compute/core/utils/quantization/AsymmHelpers.h
@@ -84,15 +84,21 @@ std::pair<int, int> get_min_max_values_from_quantized_data_type(DataType data_ty
* per-channel, multipliers and shifts will end up being the same for each
* channel.
*
- * @param[in] input Input tensor.
- * @param[in] weights Weights tensor.
- * @param[in] output Output tensor.
+ * @param[in] input Input tensor info.
+ * @param[in] weights Weights tensor info.
+ * @param[in] output Output tensor info.
+ * @param[in] idx_ofms Dimension index to get OFMs from the weights tensor.
* @param[out] output_multipliers_ptr Pointer to the buffer where to store per-channel multipliers.
* @param[out] output_shifts_ptr Pointer to the buffer where to store per-channel shifts.
*
* @return min and max values for the quantized data type
*/
-void compute_quantized_multipliers_and_shifts(const ITensor *input, const ITensor *weights, const ITensor *output, int32_t *output_multipliers_ptr, int32_t *output_shifts_ptr);
+void compute_quantized_multipliers_and_shifts(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *output,
+ unsigned int idx_ofms,
+ int32_t *output_multipliers_ptr,
+ int32_t *output_shifts_ptr);
} // namespace quantization
} // namespace arm_compute
#endif /* __ARM_COMPUTE_IO_FILE_HANDLER_H__ */