From 1b14c75c0d591c4abe4d2d41b7e4e165fbf58382 Mon Sep 17 00:00:00 2001 From: Sheri Zhang Date: Mon, 9 Mar 2020 14:29:52 +0000 Subject: COMPMID-2968: Add support for QASYMM8_SIGNED in CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel Signed-off-by: Sheri Zhang Change-Id: I37e6e76dbd5546c0eaedfacd01ea905c37148e8a Signed-off-by: Sheri Zhang Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2861 Comments-Addressed: Arm Jenkins Reviewed-by: Michele Di Giorgio Tested-by: Arm Jenkins --- Android.bp | 2 +- arm_compute/core/CL/CLKernels.h | 2 +- ...CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h | 91 +++++++++++ ...owpQuantizeDownInt32ToUint8ScaleByFloatKernel.h | 95 ----------- arm_compute/core/Types.h | 1 + .../runtime/CL/functions/CLGEMMLowpOutputStage.h | 5 +- src/core/CL/cl_kernels/gemmlowp.cl | 24 +-- ...GEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp | 177 +++++++++++++++++++++ ...pQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp | 175 -------------------- src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp | 28 +++- tests/validation/CL/GEMMLowp.cpp | 40 +++++ tests/validation/fixtures/GEMMLowpFixture.h | 103 ++++++++++++ tests/validation/reference/GEMMLowp.cpp | 63 ++++++++ tests/validation/reference/GEMMLowp.h | 8 + 14 files changed, 527 insertions(+), 287 deletions(-) create mode 100644 arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.h create mode 100644 src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp delete mode 100644 src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp diff --git a/Android.bp b/Android.bp index f9a41000dd..0d5c9e949d 100644 --- a/Android.bp +++ b/Android.bp @@ -126,11 +126,11 @@ cc_library_static { "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp", "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp", "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp", + "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp", "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp", "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp", "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp", "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp", - "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp", "src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp", "src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp", "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp", diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h index f2e16ca139..b265aa2fe7 100644 --- a/arm_compute/core/CL/CLKernels.h +++ b/arm_compute/core/CL/CLKernels.h @@ -79,11 +79,11 @@ #include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h new file mode 100644 index 0000000000..439f569d07 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H +#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; + +/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED + * + * This kernel takes a final int32 accumulator value (the output of @ref CLGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value. + * The following computations will be performed by the kernel: + * + * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier + * -# Add bias to final result if bias tensor is not a nullptr + * -# Requantize + * -# Add offset to each result + * -# Clamp the value between the specified min and max bounds + * -# Clamp the resulting int32 values to + * - to the [0..255] range and cast to QASYMM8. + * - to the [-128..127] range and cast to QASYMM8_SIGNED. + */ +class CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel : public ICLKernel +{ +public: + /** Constructor */ + CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel(const CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &operator=(const CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel(CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &operator=(CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] info Output stage info. Used to pass the quantized output data type + */ + void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] info Output stage info. Used to pass the quantized output data type + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_bias; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.h deleted file mode 100644 index 900a8c3b5d..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFLOATKERNEL_H -#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFLOATKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ICLTensor; - -/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8 - * - * This kernel takes a final int32 accumulator value (the output of @ref CLGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8 value. - * The following computations will be performed by the kernel: - * - * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier - * -# Add bias to final result if bias tensor is not a nullptr - * -# Requantize - * -# Add offset to each result - * -# Clamp the value between the specified min and max bounds - * -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8. - */ -class CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel : public ICLKernel -{ -public: - /** Constructor */ - CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel(const CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel &operator=(const CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel(CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel &operator=(CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8 - * @param[in] multiplier Float multiplier to be multiplied to each element of the input matrix - * @param[in] offset Offset to be applied to result before converting it back to QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions - */ - void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, float multiplier, int offset, int min = 0, int max = 0); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_bias; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFLOATKERNEL_H */ diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 711b68f236..37a9679a21 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -1956,6 +1956,7 @@ struct GEMMLowpOutputStageInfo int32_t gemmlowp_max_bound{ std::numeric_limits::max() }; /**< GEMMLowp max value used to saturate down the output result before converting back to QASYMM8 */ std::vector gemmlowp_multipliers{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */ std::vector gemmlowp_shifts{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */ + float gemmlowp_real_multiplier{ 0 }; /**< GEMMLowp output stage real multiplier used for quantizing to QASYMM8 */ bool is_quantized_per_channel{ false }; /**< GEMMLowp quantized per-channel flag */ DataType output_data_type{ DataType::UNKNOWN }; /**< Output tensor data type to use if the output is not initialized */ }; diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h index 184d827d4b..05cffa6680 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h +++ b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h @@ -217,7 +217,7 @@ public: * * This function calls the following OpenCL kernels: * - * -# @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel + * -# @ref CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel * * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions * after the result is shifted right by result_shift @@ -237,6 +237,7 @@ public: * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. */ + ARM_COMPUTE_DEPRECATED_REL(20.05) void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, float multiplier, int offset, int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint @@ -251,6 +252,7 @@ public: * * @return a status */ + ARM_COMPUTE_DEPRECATED_REL(20.05) static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); }; /** Basic function to execute CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint on OpenCL. @@ -317,6 +319,7 @@ public: * This function calls the following CL kernels: * * -# @ref CLGEMMLowpQuantizeDownInt32ScaleKernel + * -# @ref CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel * -# @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel * -# @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel */ diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl index 3fba781ede..7f2828689a 100644 --- a/src/core/CL/cl_kernels/gemmlowp.cl +++ b/src/core/CL/cl_kernels/gemmlowp.cl @@ -2317,9 +2317,9 @@ __kernel void gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16(TENSOR3D_DE #endif // defined(RESULT_FIXEDPOINT_MULTIPLIER) && defined(RESULT_SHIFT) #if defined(REAL_MULTIPLIER) && defined(OUTPUT_OFFSET) -/** This OpenCL kernel is used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8 +/** This OpenCL kernel is used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED * - * This kernel takes a final int32 accumulator value (the output of matrix multiplication), and processes it to obtain the final QASYMM8 value. + * This kernel takes a final int32 accumulator value (the output of matrix multiplication), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value. * The following computations will be performed by the kernel: * * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier @@ -2327,11 +2327,14 @@ __kernel void gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16(TENSOR3D_DE * -# Requantize * -# Add offset to each result * -# Clamp the value between the specified min and max bounds - * -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8. + * -# Clamp the resulting int32 values: + * - to the [0..255] range and cast to QASYMM8. + * - to the [-128..127] range and cast to QASYMM8_SIGNED. * * @attention The offset and scalar scale factor must be passed at compile time using -DRESULT_OFFSET, -DREAL_MULTIPLIER * * @note In case the addition of int32 biases is required, -DADD_BIAS should be passed at compile time + * @note The output datatype should be passed at compile time using -DOUTPUT_DATA_TYPE * @note In case the clamping of the result is required, the min and max bounds can be passed at compile time using -DMIN_BOUND and -DMAX_BOUND. * These values can be used to implement "rectified linear unit" activation functions * @@ -2388,19 +2391,20 @@ __kernel void gemmlowp_output_stage_quantize_down_float(TENSOR3D_DECLARATION(src #endif // defined(ADD_BIAS) // Convert to float - float16 input_values_f = convert_float4(input_values); - input_values_f = round(input_values_f * (float)REAL_MULTIPLIER + (float)OUTPUT_OFFSET); + float4 input_values_f = convert_float4(input_values); + input_values_f = round(input_values_f * (float)REAL_MULTIPLIER + (float)OUTPUT_OFFSET); - uchar4 res = convert_uchar4_sat(input_values_f); + VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4) + res = CONVERT_SAT(input_values_f, VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4)); #if defined(MIN_BOUND) - res = max(res, (uchar4)MIN_BOUND); + res = max(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MIN_BOUND); #endif // defined(MIN_BOUND) #if defined(MAX_BOUND) - res = min(res, (uchar4)MAX_BOUND); + res = min(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MAX_BOUND); #endif // defined(MAX_BOUND) // Store the result - vstore4(res, 0, dst_addr); + vstore4(res, 0, (__global OUTPUT_DATA_TYPE *)dst_addr); } -#endif // defined(REAL_MULTIPLIER) && defined(OUTPUT_OFFSET) +#endif // defined(REAL_MULTIPLIER) && defined(OUTPUT_OFFSET) \ No newline at end of file diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp new file mode 100644 index 0000000000..5a554f3111 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2018-2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h" + +#include "arm_compute/core/AccessWindowStatic.h" +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "support/StringSupport.h" + +namespace arm_compute +{ +namespace +{ +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, + const GEMMLowpOutputStageInfo *info) +{ + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON((info->output_data_type != DataType::QASYMM8) && (info->output_data_type != DataType::QASYMM8_SIGNED)); + ARM_COMPUTE_RETURN_ERROR_ON(info->gemmlowp_max_bound > std::get<1>(quantization::get_min_max_values_from_quantized_data_type(info->output_data_type))); + ARM_COMPUTE_RETURN_ERROR_ON(info->gemmlowp_min_bound < std::get<0>(quantization::get_min_max_values_from_quantized_data_type(info->output_data_type)) + || info->gemmlowp_min_bound > info->gemmlowp_max_bound); + + // Check biases if exist + if(bias != nullptr) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias); + ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() > 1); + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != bias->dimension(0)); + } + + if(output->total_size() != 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() != info->output_data_type, "Mismatching output data type"); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); + } + + return Status{}; +} + +std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *bias, ITensorInfo *output, DataType output_data_type) +{ + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output, input->clone()->set_data_type(output_data_type)); + + constexpr unsigned int num_elems_processed_per_iteration = 4; + + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output, input->clone()->set_data_type(DataType::QASYMM8)); + + // Configure kernel window + Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); + + AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); + + bool window_changed = update_window_and_padding(win, + input_access); + + AccessWindowHorizontal output_result_access(output, 0, num_elems_processed_per_iteration); + window_changed = window_changed || update_window_and_padding(win, output_result_access); + output_result_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape())); + + if(bias != nullptr) + { + AccessWindowStatic bias_access(bias, 0, 0, ceil_to_multiple(bias->dimension(0), num_elems_processed_per_iteration), bias->tensor_shape()[1]); + window_changed = window_changed || update_window_and_padding(win, bias_access); + } + + Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + return std::make_pair(err, win); +} +} // namespace + +class Coordinates; +CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel::CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel() + : _input(nullptr), _bias(nullptr), _output(nullptr) +{ +} + +Status CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, + const GEMMLowpOutputStageInfo *info) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, bias, output, info)); + + return Status{}; +} + +void CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, + const GEMMLowpOutputStageInfo *info) +{ + // Perform validate step + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (bias != nullptr) ? bias->info() : nullptr, output->info(), info)); + + _input = input; + _bias = bias; + _output = output; + + auto min = info->gemmlowp_min_bound; + auto max = info->gemmlowp_max_bound; + + // Set the arguments to pass at compile time + CLBuildOptions build_opts; + build_opts.add_option("-DREAL_MULTIPLIER=" + float_to_string_with_full_precision(info->gemmlowp_real_multiplier)); + build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(info->gemmlowp_offset)); + build_opts.add_option("-DOUTPUT_DATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type())); + build_opts.add_option_if((min > 0), "-DMIN_BOUND=" + support::cpp11::to_string(min)); + build_opts.add_option_if((max < 255), "-DMAX_BOUND=" + support::cpp11::to_string(max)); + build_opts.add_option_if(bias != nullptr, "-DADD_BIAS"); + + // Create kernel + _kernel = static_cast(CLKernelLibrary::get().create_kernel("gemmlowp_output_stage_quantize_down_float", build_opts.options())); + + // Configure kernel window + auto win_config = validate_and_configure_window(input->info(), (bias != nullptr) ? bias->info() : nullptr, output->info(), info->output_data_type); + ARM_COMPUTE_ERROR_THROW_ON(win_config.first); + ICLKernel::configure_internal(win_config.second); +} + +void CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel::run(const Window &window, cl::CommandQueue &queue) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); + + // Create input window + Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); + Window slice = collapsed.first_slice_window_3D(); + + // Setup bias slice + unsigned int idx1 = num_arguments_per_3D_tensor(); + if(_bias != nullptr) + { + Window biases_slice(slice); + biases_slice.set(Window::DimY, Window::Dimension(0, 1, 1)); + biases_slice.set(Window::DimZ, Window::Dimension(0, 1, 1)); + add_1D_tensor_argument(idx1, _bias, biases_slice); + } + + do + { + unsigned int idx = 0; + add_3D_tensor_argument(idx, _input, slice); + add_3D_tensor_argument(idx1, _output, slice); + enqueue(queue, *this, slice, lws_hint()); + } + while(collapsed.slide_window_slice_3D(slice)); +} +} // namespace arm_compute diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp deleted file mode 100644 index 7097dc9248..0000000000 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.h" - -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" - -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, - int min, int max) -{ - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON(min > max); - - // Check biases if exist - if(bias != nullptr) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias); - ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() > 1); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != bias->dimension(0)); - } - - if(output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); - } - - return Status{}; -} - -std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *bias, ITensorInfo *output) -{ - constexpr unsigned int num_elems_processed_per_iteration = 4; - - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output, input->clone()->set_data_type(DataType::QASYMM8)); - - // Configure kernel window - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); - - bool window_changed = update_window_and_padding(win, - input_access); - - if(output->total_size() != 0) - { - Window win_out = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal output_result_access(output, 0, num_elems_processed_per_iteration); - window_changed = window_changed || update_window_and_padding(win_out, output_result_access); - - output_result_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape())); - } - - if(bias != nullptr) - { - AccessWindowStatic bias_access(bias, 0, 0, ceil_to_multiple(bias->dimension(0), num_elems_processed_per_iteration), bias->tensor_shape()[1]); - window_changed = window_changed || update_window_and_padding(win, bias_access); - } - - Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; - return std::make_pair(err, win); -} -} // namespace - -class Coordinates; -CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel::CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel() - : _input(nullptr), _bias(nullptr), _output(nullptr) -{ -} - -Status CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, bias, output, min, max)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), - (bias != nullptr) ? bias->clone().get() : nullptr, - output->clone().get()) - .first); - - return Status{}; -} - -void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, - float multiplier, int offset, - int min, int max) -{ - // Perform validate step - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (bias != nullptr) ? bias->info() : nullptr, output->info(), min, max)); - - _input = input; - _bias = bias; - _output = output; - - // Set the arguments to pass at compile time - CLBuildOptions build_opts; - build_opts.add_option("-DREAL_MULTIPLIER=" + float_to_string_with_full_precision(multiplier)); - build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(offset)); - build_opts.add_option_if((min > 0), "-DMIN_BOUND=" + support::cpp11::to_string(min)); - build_opts.add_option_if((max < 255), "-DMAX_BOUND=" + support::cpp11::to_string(max)); - build_opts.add_option_if(bias != nullptr, "-DADD_BIAS"); - - // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("gemmlowp_output_stage_quantize_down_float", build_opts.options())); - - // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), (bias != nullptr) ? bias->info() : nullptr, output->info()); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure_internal(win_config.second); -} - -void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - // Create input window - Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); - Window slice = collapsed.first_slice_window_3D(); - - // Setup bias slice - unsigned int idx1 = num_arguments_per_3D_tensor(); - if(_bias != nullptr) - { - Window biases_slice(slice); - biases_slice.set(Window::DimY, Window::Dimension(0, 1, 1)); - biases_slice.set(Window::DimZ, Window::Dimension(0, 1, 1)); - add_1D_tensor_argument(idx1, _bias, biases_slice); - } - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, slice); - add_3D_tensor_argument(idx1, _output, slice); - enqueue(queue, *this, slice, lws_hint()); - } - while(collapsed.slide_window_slice_3D(slice)); -} -} // namespace arm_compute diff --git a/src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp b/src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp index e86f303ff4..fbd1820098 100644 --- a/src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp +++ b/src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp @@ -24,11 +24,11 @@ #include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h" #include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.h" #include "support/MemorySupport.h" namespace arm_compute @@ -90,15 +90,24 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloat::configure(const ICLTensor * float multiplier, int offset, int min, int max) { - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(input, bias, output, multiplier, offset, min, max); + GEMMLowpOutputStageInfo info = GEMMLowpOutputStageInfo(); + info.gemmlowp_offset = offset; + info.gemmlowp_real_multiplier = multiplier; + info.gemmlowp_min_bound = min; + info.gemmlowp_max_bound = max; + + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input, bias, output, &info); _kernel = std::move(k); } Status CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloat::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max) { - return CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel::validate(input, bias, output, min, max); + GEMMLowpOutputStageInfo info = GEMMLowpOutputStageInfo(); + info.gemmlowp_min_bound = min; + info.gemmlowp_max_bound = max; + return CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel::validate(input, bias, output, &info); } void CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, @@ -165,6 +174,13 @@ void CLGEMMLowpOutputStage::configure(const ICLTensor *input, const ICLTensor *b } break; } + case GEMMLowpOutputStageType::QUANTIZE_DOWN_FLOAT: + { + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input, bias, output, &info); + _kernel = std::move(k); + break; + } default: ARM_COMPUTE_ERROR("Unsupported GEMMLowpOutputStage type."); } @@ -202,6 +218,10 @@ Status CLGEMMLowpOutputStage::validate(const ITensorInfo *input, const ITensorIn return ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Unsupported output data type."); } } + case GEMMLowpOutputStageType::QUANTIZE_DOWN_FLOAT: + { + return CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel::validate(input, bias, output, &info); + } default: return ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Unsupported GEMMLowpOutputStage type."); } diff --git a/tests/validation/CL/GEMMLowp.cpp b/tests/validation/CL/GEMMLowp.cpp index 3d7c76aa2b..8aa81d0962 100644 --- a/tests/validation/CL/GEMMLowp.cpp +++ b/tests/validation/CL/GEMMLowp.cpp @@ -389,6 +389,46 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedP TEST_SUITE_END() // MultGreater1 TEST_SUITE_END() // BoundedReLu TEST_SUITE_END() // QuantizeDownInt32ToInt16ScaleByFixedPoint + +TEST_SUITE(QuantizeDownInt32ScaleByFloat) + +TEST_SUITE(QASYMM8) +using CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture = + GEMMLowpQuantizeDownInt32ScaleByFloatValidationFixture; + +FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), + datasets::TinyShapes()), + framework::dataset::make("result_real_multiplier", 0.33f)), + framework::dataset::make("result_offset", 2, 3)), + framework::dataset::make("min", 0)), + framework::dataset::make("max", 255)), + framework::dataset::make("addBias", { false, true }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) +using CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture_Signed = + GEMMLowpQuantizeDownInt32ScaleByFloatValidationFixture; +FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture_Signed, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), + datasets::TinyShapes()), + framework::dataset::make("result_real_multiplier", 0.33f)), + framework::dataset::make("result_offset", 2, 3)), + framework::dataset::make("min", -128)), + framework::dataset::make("max", 127)), + framework::dataset::make("addBias", { false, true }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // QASYMM8_SIGNED + +TEST_SUITE_END() // QuantizeDownInt32ScaleByFloat + TEST_SUITE_END() // OutputStage TEST_SUITE_END() // GEMMLowp TEST_SUITE_END() // CL diff --git a/tests/validation/fixtures/GEMMLowpFixture.h b/tests/validation/fixtures/GEMMLowpFixture.h index 0207f4c5ae..be9ce96dcb 100644 --- a/tests/validation/fixtures/GEMMLowpFixture.h +++ b/tests/validation/fixtures/GEMMLowpFixture.h @@ -556,6 +556,109 @@ protected: SimpleTensor _reference{}; }; +template +class GEMMLowpQuantizeDownInt32ScaleByFloatValidationFixture : public framework::Fixture +{ +public: + template + void setup(DataType data_type, TensorShape shape, float result_real_multiplier, int32_t result_offset, int32_t min, int32_t max, bool add_bias) + { + _target = compute_target(data_type, shape, result_real_multiplier, result_offset, min, max, add_bias); + _reference = compute_reference(shape, result_real_multiplier, result_offset, min, max, add_bias); + } + +protected: + template + void fill(U &&tensor, int i) + { + // To avoid data all being clampped + std::uniform_int_distribution<> distribution(-500, 500); + library->fill(tensor, distribution, i); + } + + TensorType compute_target(DataType data_type, const TensorShape &shape, float result_multiplier, int32_t result_offset, int32_t min, int32_t max, bool add_bias) + { + TensorShape shape_bias(shape[0]); + + // Create tensors + TensorType a = create_tensor(shape, DataType::S32, 1); + TensorType b = create_tensor(shape_bias, DataType::S32, 1); + TensorType c = create_tensor(shape, data_type, 1); + + // create output stage info + GEMMLowpOutputStageInfo info; + info.gemmlowp_max_bound = max; + info.gemmlowp_min_bound = min; + info.gemmlowp_real_multiplier = result_multiplier; + info.gemmlowp_offset = result_offset; + info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FLOAT; + info.output_data_type = data_type; + + // Create and configure function + FunctionType output_stage; + output_stage.configure(&a, add_bias ? &b : nullptr, &c, info); + + ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Allocate tensors + a.allocator()->allocate(); + c.allocator()->allocate(); + + ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!c.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Fill tensor + fill(AccessorType(a), 0); + + if(add_bias) + { + ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Allocate bias tensor + b.allocator()->allocate(); + + ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Fill tensor + fill(AccessorType(b), 1); + } + + // Compute GEMM function + output_stage.run(); + return c; + } + + SimpleTensor compute_reference(const TensorShape &shape, float_t result_real_multiplier, int32_t result_offset, int32_t min, int32_t max, bool add_bias) + { + // Create reference + TensorShape shape_bias(shape[0]); + + SimpleTensor a{ shape, DataType::S32, 1 }; + SimpleTensor b{ shape_bias, DataType::S32, 1 }; + + // Fill reference + fill(a, 0); + + const std::vector result_float_multiplier_vec = { result_real_multiplier }; + + if(add_bias) + { + // Fill bias + fill(b, 1); + + return reference::gemmlowp_quantize_down_scale_by_float(a, b, result_float_multiplier_vec, result_offset, min, max); + } + else + { + return reference::gemmlowp_quantize_down_scale_by_float(a, result_float_multiplier_vec, result_offset, min, max); + } + } + + TensorType _target{}; + SimpleTensor _reference{}; +}; + template class GEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointValidationFixture : public framework::Fixture { diff --git a/tests/validation/reference/GEMMLowp.cpp b/tests/validation/reference/GEMMLowp.cpp index 99d08e34f1..61617c8aae 100644 --- a/tests/validation/reference/GEMMLowp.cpp +++ b/tests/validation/reference/GEMMLowp.cpp @@ -131,6 +131,39 @@ void quantize_down_scale_by_fixedpoint(const SimpleTensor *in, const Simple std::min(std::numeric_limits::max(), result))); } } + +template +void quantize_down_scale_by_float(const SimpleTensor *in, const SimpleTensor *bias, SimpleTensor *dst, std::vector result_real_multiplier, + int32_t result_offset, int32_t min, int32_t max) +{ + const int cols_in = in->shape().x(); + const bool is_per_channel = result_real_multiplier.size() > 1; + + for(int i = 0; i < in->num_elements(); ++i) + { + TIn result = (*in)[i]; + + if(bias != nullptr) + { + result += (*bias)[i % cols_in]; + } + + // Float multiplication + const float_t multiplier = (is_per_channel) ? result_real_multiplier[i % cols_in] : result_real_multiplier[0]; + + float_t result_f = static_cast(result) * multiplier + static_cast(result_offset); + result = static_cast(std::round(result_f)); + + // Bounded ReLu + if(min != max) + { + result = std::max(min, std::min(max, result)); + } + + (*dst)[i] = static_cast(std::max(std::numeric_limits::lowest(), + std::min(std::numeric_limits::max(), result))); + } +} } // namespace template @@ -237,6 +270,36 @@ SimpleTensor gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor return dst; } +template +SimpleTensor gemmlowp_quantize_down_scale_by_float(const SimpleTensor &in, const SimpleTensor &bias, + std::vector result_real_multiplier, int32_t result_offset, int32_t min, int32_t max) +{ + SimpleTensor dst(in.shape(), DataTypeExtractor::data_type()); + + quantize_down_scale_by_float(&in, &bias, &dst, result_real_multiplier, result_offset, min, max); + + return dst; +} + +template +SimpleTensor gemmlowp_quantize_down_scale_by_float(const SimpleTensor &in, + std::vector result_real_multiplier, int32_t result_offset, int32_t min, int32_t max) +{ + SimpleTensor dst(in.shape(), DataTypeExtractor::data_type()); + + quantize_down_scale_by_float(&in, nullptr, &dst, result_real_multiplier, result_offset, min, max); + + return dst; +} + +template SimpleTensor gemmlowp_quantize_down_scale_by_float(const SimpleTensor &a, const SimpleTensor &b, + std::vector result_real_multiplier, int32_t result_offset, int32_t min, int32_t max); +template SimpleTensor gemmlowp_quantize_down_scale_by_float(const SimpleTensor &a, + std::vector result_real_multiplier, int32_t result_offset, int32_t min, int32_t max); +template SimpleTensor gemmlowp_quantize_down_scale_by_float(const SimpleTensor &a, const SimpleTensor &b, + std::vector result_real_multiplier, int32_t result_offset, int32_t min, int32_t max); +template SimpleTensor gemmlowp_quantize_down_scale_by_float(const SimpleTensor &a, + std::vector result_real_multiplier, int32_t result_offset, int32_t min, int32_t max); template SimpleTensor gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor &a, std::vector result_fixedpoint_multiplier, std::vector result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max); template SimpleTensor gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor &a, const SimpleTensor &b, diff --git a/tests/validation/reference/GEMMLowp.h b/tests/validation/reference/GEMMLowp.h index 7d711263e8..5de48dab52 100644 --- a/tests/validation/reference/GEMMLowp.h +++ b/tests/validation/reference/GEMMLowp.h @@ -59,6 +59,14 @@ SimpleTensor gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor template SimpleTensor gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor &in, const SimpleTensor &bias, std::vector result_fixedpoint_multiplier, std::vector result_shift, int32_t result_offset_after_shift, int32_t min = 0, int32_t max = 0); + +template +SimpleTensor gemmlowp_quantize_down_scale_by_float(const SimpleTensor &in, const SimpleTensor &bias, + std::vector result_real_multiplier, int32_t result_offset, int32_t min = 0, int32_t max = 0); + +template +SimpleTensor gemmlowp_quantize_down_scale_by_float(const SimpleTensor &in, + std::vector result_real_multiplier, int32_t result_offset, int32_t min = 0, int32_t max = 0); } // namespace reference } // namespace validation } // namespace test -- cgit v1.2.1