From 4c5469b192665c94118a8a558787cb9cec2d0765 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 21 May 2019 13:32:43 +0100 Subject: COMPMID-2225: Add interface support for new quantized data types. Add support for: -QSYMM8, 8-bit quantized symmetric -QSYMM8_PER_CHANNEL, 8-bit quantized symmetric with per channel quantization Change-Id: I00c4ff98e44af37419470af61419ee95d0de2463 Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/1236 Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins --- arm_compute/core/CL/CLTypes.h | 21 +- arm_compute/core/CL/ICLTensor.h | 9 +- arm_compute/core/Helpers.h | 12 +- arm_compute/core/NEON/NEAsymm.h | 8 +- .../core/NEON/kernels/NEActivationLayerKernel.h | 1 - .../NEON/kernels/NEPixelWiseMultiplicationKernel.h | 2 +- arm_compute/core/PixelValue.h | 13 +- arm_compute/core/QAsymm8.h | 33 --- arm_compute/core/QAsymm8.inl | 43 ---- arm_compute/core/QuantizationInfo.h | 259 +++++++++++++++++++++ arm_compute/core/Types.h | 113 ++------- arm_compute/core/Utils.h | 20 +- arm_compute/runtime/CL/CLSubTensor.h | 3 +- arm_compute/runtime/CL/CLTensor.h | 4 +- arm_compute/runtime/CL/CLTensorAllocator.h | 9 + arm_compute/runtime/CL/CLTunerTypes.h | 1 + arm_compute/runtime/ITensorAllocator.h | 4 +- src/core/CL/kernels/CLActivationLayerKernel.cpp | 35 +-- src/core/CL/kernels/CLComparisonKernel.cpp | 11 +- .../CL/kernels/CLDepthConcatenateLayerKernel.cpp | 11 +- .../CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp | 22 +- .../CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp | 22 +- src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp | 9 +- .../CL/kernels/CLDequantizationLayerKernel.cpp | 6 +- .../CL/kernels/CLDirectConvolutionLayerKernel.cpp | 12 +- .../CL/kernels/CLElementwiseOperationKernel.cpp | 16 +- .../kernels/CLGEMMMatrixVectorMultiplyKernel.cpp | 9 +- .../CL/kernels/CLHeightConcatenateLayerKernel.cpp | 11 +- src/core/CL/kernels/CLIm2ColKernel.cpp | 11 +- .../CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp | 5 +- .../CL/kernels/CLPixelWiseMultiplicationKernel.cpp | 16 +- src/core/CL/kernels/CLPoolingLayerKernel.cpp | 11 +- src/core/CL/kernels/CLQuantizationLayerKernel.cpp | 6 +- src/core/CL/kernels/CLRangeKernel.cpp | 5 +- src/core/CL/kernels/CLScaleKernel.cpp | 5 +- src/core/CL/kernels/CLSoftmaxLayerKernel.cpp | 16 +- .../kernels/CLWidthConcatenate2TensorsKernel.cpp | 16 +- .../kernels/CLWidthConcatenate4TensorsKernel.cpp | 26 ++- .../CL/kernels/CLWidthConcatenateLayerKernel.cpp | 11 +- src/core/NEON/kernels/NEActivationLayerKernel.cpp | 23 +- .../NEON/kernels/NEArithmeticAdditionKernel.cpp | 42 ++-- .../NEON/kernels/NEArithmeticSubtractionKernel.cpp | 10 +- .../NEON/kernels/NEDepthConcatenateLayerKernel.cpp | 6 +- .../NEDepthwiseConvolutionLayer3x3Kernel.cpp | 4 +- src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp | 2 +- .../NEON/kernels/NEDequantizationLayerKernel.cpp | 4 +- .../NEON/kernels/NEElementwiseOperationKernel.cpp | 46 ++-- .../kernels/NEFuseBatchNormalizationKernel.cpp | 3 - .../kernels/NEGEMMMatrixVectorMultiplyKernel.cpp | 4 +- .../kernels/NEHeightConcatenateLayerKernel.cpp | 10 +- src/core/NEON/kernels/NEIm2ColKernel.cpp | 2 +- .../kernels/NEPixelWiseMultiplicationKernel.cpp | 6 +- src/core/NEON/kernels/NEPoolingLayerKernel.cpp | 27 +-- .../NEON/kernels/NEQuantizationLayerKernel.cpp | 5 +- .../NEON/kernels/NEReductionOperationKernel.cpp | 19 +- src/core/NEON/kernels/NEReverseKernel.cpp | 1 - src/core/NEON/kernels/NEScaleKernel.cpp | 24 +- src/core/NEON/kernels/NESoftmaxLayerKernel.cpp | 4 +- .../NEON/kernels/NEWidthConcatenateLayerKernel.cpp | 10 +- src/core/NEON/kernels/NEYOLOLayerKernel.cpp | 1 - src/runtime/CL/CLSubTensor.cpp | 7 +- src/runtime/CL/CLTensor.cpp | 7 +- src/runtime/CL/CLTensorAllocator.cpp | 57 ++++- .../CL/functions/CLDepthwiseConvolutionLayer.cpp | 14 +- .../CL/functions/CLDirectConvolutionLayer.cpp | 4 +- src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 19 +- .../CL/functions/CLGEMMConvolutionLayer.cpp | 32 ++- .../CL/functions/CLGEMMDeconvolutionLayer.cpp | 8 +- .../CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp | 8 +- src/runtime/CL/functions/CLPoolingLayer.cpp | 4 +- .../NEON/functions/NEDepthwiseConvolutionLayer.cpp | 22 +- .../NEON/functions/NEFullyConnectedLayer.cpp | 16 +- .../NEON/functions/NEGEMMConvolutionLayer.cpp | 44 ++-- .../functions/NEGEMMLowpMatrixMultiplyCore.cpp | 8 +- src/runtime/NEON/functions/NEPoolingLayer.cpp | 4 +- .../NEDepthwiseConvolutionAssemblyDispatch.cpp | 6 +- tests/Utils.h | 2 + tests/validate_examples/graph_convolution.cpp | 39 ++-- .../graph_depthwiseconvolution.cpp | 37 ++- tests/validate_examples/graph_fully_connected.cpp | 25 +- tests/validate_examples/graph_validate_utils.h | 9 +- tests/validation/CL/UNIT/TensorAllocator.cpp | 42 ++++ tests/validation/Helpers.cpp | 16 +- tests/validation/UNIT/TensorInfo.cpp | 64 ++++- .../fixtures/NormalizePlanarYUVLayerFixture.h | 4 +- tests/validation/fixtures/RangeFixture.h | 6 +- tests/validation/reference/ConcatenateLayer.cpp | 9 +- tests/validation/reference/Convolution3d.h | 22 +- tests/validation/reference/DeconvolutionLayer.cpp | 2 +- .../validation/reference/DepthConcatenateLayer.cpp | 20 +- .../reference/DepthwiseConvolutionLayer.cpp | 27 +-- .../reference/DepthwiseConvolutionLayer.h | 2 +- tests/validation/reference/DequantizationLayer.cpp | 6 +- tests/validation/reference/FullyConnectedLayer.cpp | 16 +- tests/validation/reference/Im2Col.cpp | 6 +- tests/validation/reference/QuantizationLayer.cpp | 11 +- tests/validation/reference/QuantizationLayer.h | 2 +- tests/validation/reference/Scale.cpp | 6 +- utils/TypePrinter.h | 23 +- utils/Utils.h | 2 + 100 files changed, 1099 insertions(+), 659 deletions(-) delete mode 100644 arm_compute/core/QAsymm8.h delete mode 100644 arm_compute/core/QAsymm8.inl create mode 100644 arm_compute/core/QuantizationInfo.h diff --git a/arm_compute/core/CL/CLTypes.h b/arm_compute/core/CL/CLTypes.h index 4a03cc9637..24ae542c7c 100644 --- a/arm_compute/core/CL/CLTypes.h +++ b/arm_compute/core/CL/CLTypes.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #ifndef __ARM_COMPUTE_CL_TYPES_H__ #define __ARM_COMPUTE_CL_TYPES_H__ +#include "arm_compute/core/CL/ICLArray.h" #include "arm_compute/core/GPUTarget.h" #include @@ -53,5 +54,23 @@ struct CLDeviceOptions size_t num_cores; /**< Number of cores */ size_t cache_size; /**< Cache size */ }; + +/** OpenCL quantization data */ +struct CLQuantization +{ + /** Default Constructor */ + CLQuantization() + : scale(nullptr), offset(nullptr) {}; + /** Constructor + * + * @param[in] scale OpenCL scale array + * @param[in] offset OpenCL offset array + */ + CLQuantization(const ICLFloatArray *scale, const ICLInt32Array *offset) + : scale(scale), offset(offset) {}; + + const ICLFloatArray *scale; /**< Quantization scale array */ + const ICLInt32Array *offset; /**< Quantization offset array */ +}; } // namespace arm_compute #endif /* __ARM_COMPUTE_CL_TYPES_H__ */ diff --git a/arm_compute/core/CL/ICLTensor.h b/arm_compute/core/CL/ICLTensor.h index 0f5dba923b..094a0c3dec 100644 --- a/arm_compute/core/CL/ICLTensor.h +++ b/arm_compute/core/CL/ICLTensor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,8 @@ #include "arm_compute/core/ITensor.h" +#include "arm_compute/core/CL/CLTypes.h" + #include namespace cl @@ -53,6 +55,11 @@ public: /** Default virtual destructor. */ virtual ~ICLTensor() = default; + /** Interface to be implemented by the child class to return the wrapped quantization info data + * + * @return A wrapped quantization info object. + */ + virtual CLQuantization quantization() const = 0; /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the image's data. * * @return A reference to an OpenCL buffer containing the image's data. diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h index 235657a38a..87b1fdf64c 100644 --- a/arm_compute/core/Helpers.h +++ b/arm_compute/core/Helpers.h @@ -158,24 +158,24 @@ inline T delta_bilinear_c1(const T *pixel_ptr, size_t stride, float dx, float dy * * @return The bilinear interpolated pixel value */ -inline uint8_t delta_bilinear_c1_quantized(const uint8_t *pixel_ptr, size_t stride, float dx, float dy, QuantizationInfo iq_info, QuantizationInfo oq_info) +inline uint8_t delta_bilinear_c1_quantized(const uint8_t *pixel_ptr, size_t stride, float dx, float dy, UniformQuantizationInfo iq_info, UniformQuantizationInfo oq_info) { ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); const float dx1 = 1.0f - dx; const float dy1 = 1.0f - dy; - const float a00 = iq_info.dequantize(*pixel_ptr); - const float a01 = iq_info.dequantize(*(pixel_ptr + 1)); - const float a10 = iq_info.dequantize(*(pixel_ptr + stride)); - const float a11 = iq_info.dequantize(*(pixel_ptr + stride + 1)); + const float a00 = dequantize_qasymm8(*pixel_ptr, iq_info); + const float a01 = dequantize_qasymm8(*(pixel_ptr + 1), iq_info); + const float a10 = dequantize_qasymm8(*(pixel_ptr + stride), iq_info); + const float a11 = dequantize_qasymm8(*(pixel_ptr + stride + 1), iq_info); const float w1 = dx1 * dy1; const float w2 = dx * dy1; const float w3 = dx1 * dy; const float w4 = dx * dy; float res = a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4; - return static_cast(oq_info.quantize(res, RoundingPolicy::TO_NEAREST_UP)); + return static_cast(quantize_qasymm8(res, oq_info)); } /** Computes linear interpolation using the pointer to the top pixel and the pixel's distance between diff --git a/arm_compute/core/NEON/NEAsymm.h b/arm_compute/core/NEON/NEAsymm.h index 253d0fdff7..2347c468ab 100644 --- a/arm_compute/core/NEON/NEAsymm.h +++ b/arm_compute/core/NEON/NEAsymm.h @@ -182,7 +182,7 @@ inline uint8_t finalize_quantization(int32_t in_value, int result_fixedpoint_mul * * @return Dequantized values in a neon vector */ -inline float32x4x2_t vdequantize(const uint8x8_t &qv, const QuantizationInfo &qi) +inline float32x4x2_t vdequantize(const uint8x8_t &qv, const UniformQuantizationInfo &qi) { const float scale = qi.scale; const int offset = qi.offset; @@ -205,7 +205,7 @@ inline float32x4x2_t vdequantize(const uint8x8_t &qv, const QuantizationInfo &qi * * @return Dequantized values in a neon vector */ -inline float32x4x4_t vdequantize(const uint8x16_t &qv, const QuantizationInfo &qi) +inline float32x4x4_t vdequantize(const uint8x16_t &qv, const UniformQuantizationInfo &qi) { const float scale = qi.scale; const int offset = qi.offset; @@ -230,7 +230,7 @@ inline float32x4x4_t vdequantize(const uint8x16_t &qv, const QuantizationInfo &q * * @return A neon vector holding the quantized values */ -inline uint8x8_t vquantize(const float32x4x2_t &qv, const QuantizationInfo &qi) +inline uint8x8_t vquantize(const float32x4x2_t &qv, const UniformQuantizationInfo &qi) { const float scale = qi.scale; const int offset = qi.offset; @@ -258,7 +258,7 @@ inline uint8x8_t vquantize(const float32x4x2_t &qv, const QuantizationInfo &qi) * * @return A neon vector holding the quantized values */ -inline uint8x16_t vquantize(const float32x4x4_t &qv, const QuantizationInfo &qi) +inline uint8x16_t vquantize(const float32x4x4_t &qv, const UniformQuantizationInfo &qi) { const float scale = qi.scale; const int offset = qi.offset; diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h index 447f4880ee..9381beaded 100644 --- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h @@ -25,7 +25,6 @@ #define __ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H__ #include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/QAsymm8.h" #include "arm_compute/core/utils/misc/Traits.h" #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC diff --git a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h index cbb961f235..daa29fdf4f 100644 --- a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h +++ b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h @@ -115,7 +115,7 @@ private: * */ using MulFunctionQASYMM8 = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale, - const QuantizationInfo &input1_qua_info, const QuantizationInfo &input2_qua_info, const QuantizationInfo &output_qua_info); + const UniformQuantizationInfo &input1_qua_info, const UniformQuantizationInfo &input2_qua_info, const UniformQuantizationInfo &output_qua_info); MulFunctionFloat *_func_float; MulFunctionInt *_func_int; diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h index 0ead9db7b1..4bdcad61a2 100644 --- a/arm_compute/core/PixelValue.h +++ b/arm_compute/core/PixelValue.h @@ -41,11 +41,11 @@ public: } /** Initialize the union with a pixel value of chosen datatype * - * @param[in] v int value. - * @param[in] datatype DataType that @p v have to be stored - * @param[in] quant_info QuantizationInfo to apply in case of QASYMM8 datatype to @p v + * @param[in] v int value. + * @param[in] datatype DataType that @p v have to be stored + * @param[in] qinfo (Optional) QuantizationInfo to apply in case of QASYMM8 datatype to @p v */ - PixelValue(uint64_t v, DataType datatype, QuantizationInfo quant_info = QuantizationInfo()) + PixelValue(uint64_t v, DataType datatype, QuantizationInfo qinfo = QuantizationInfo()) : PixelValue() { switch(datatype) @@ -57,7 +57,10 @@ public: value.s8 = static_cast(v); break; case DataType::QASYMM8: - value.u8 = sqcvt_qasymm8_f32(v, quant_info.scale, quant_info.offset); + value.u8 = quantize_qasymm8(static_cast(v), qinfo); + break; + case DataType::QSYMM8: + value.s8 = quantize_qsymm8(static_cast(v), qinfo); break; case DataType::U16: value.u16 = static_cast(v); diff --git a/arm_compute/core/QAsymm8.h b/arm_compute/core/QAsymm8.h deleted file mode 100644 index 2fa4029807..0000000000 --- a/arm_compute/core/QAsymm8.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_QASYMM8_H__ -#define __ARM_COMPUTE_QASYMM8_H__ - -#include "arm_compute/core/Rounding.h" -#include - -namespace arm_compute -{ -using qasymm8_t = uint8_t; /**< 8 bit quantized asymmetric scalar value */ -} -#include "arm_compute/core/QAsymm8.inl" -#endif /* __ARM_COMPUTE_QASYMM8_H__ */ diff --git a/arm_compute/core/QAsymm8.inl b/arm_compute/core/QAsymm8.inl deleted file mode 100644 index 77109c4010..0000000000 --- a/arm_compute/core/QAsymm8.inl +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2017-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include -#include - -namespace arm_compute -{ -#ifndef DOXYGEN_SKIP_THIS -inline qasymm8_t sqcvt_qasymm8_f32(float value, float scale, int offset, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) -{ - int quantized = arm_compute::round(value / scale, rounding_policy) + offset; - quantized = std::max(0, std::min(quantized, 255)); - return quantized; -} - -inline float scvt_f32_qasymm8(qasymm8_t value, float scale, int offset) -{ - float dequantized = (static_cast(value) - offset) * scale; - return dequantized; -} -#endif /* DOXYGEN_SKIP_THIS */ -} diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h new file mode 100644 index 0000000000..94f7e76c3e --- /dev/null +++ b/arm_compute/core/QuantizationInfo.h @@ -0,0 +1,259 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_QUANTIZATION_INFO_H__ +#define __ARM_COMPUTE_QUANTIZATION_INFO_H__ + +#include "arm_compute/core/Rounding.h" + +#include +#include + +namespace arm_compute +{ +using qasymm8_t = uint8_t; /**< 8 bit quantized asymmetric scalar value */ +using qsymm8_t = int8_t; /**< 8 bit quantized symmetric scalar value */ + +/** Quantization info when assuming per layer quantization */ +struct UniformQuantizationInfo +{ + /** Default constructor */ + UniformQuantizationInfo() + : scale(0.f), offset(0) + { + } + /** Constructor + * + * @param[in] scale Quantization scale + * @param[in] offset Quantization offset + */ + UniformQuantizationInfo(float scale, int32_t offset) + : scale(scale), offset(offset) + { + } + /** Checks if the scale and offset are both zero */ + bool empty() const + { + return (scale == 0) && (offset == 0); + } + + float scale; + int32_t offset; +}; + +/** Quantization information */ +struct QuantizationInfo +{ + /** Default constructor */ + QuantizationInfo() noexcept + : scale(), + offset() + { + } + /** Construct quantization info. + * + * @note Used for symmetric quantization + * + * @param[in] scale Scale. + */ + QuantizationInfo(float scale) + : scale(1, scale), offset() + { + } + /** Construct quantization info. + * + * @note Used for asymmetric quantization + * + * @param[in] scale Scale. + * @param[in] offset Offset. + */ + QuantizationInfo(float scale, int offset) + : scale(1, scale), offset(1, offset) + { + } + /** Construct quantization info. + * + * @note Used for symmetric per channel quantization + * + * @param[in] scale Scale. + */ + QuantizationInfo(std::vector scale) + : scale(scale), offset() + { + } + /** Indicates whether this QuantizationInfo has valid settings or not + * + * @return True if the this has invalid settings. + */ + bool empty() const + { + return scale.empty() && offset.empty(); + } + /** Return per layer quantization info + * + * @return Uniform quantization information in case of empty information zero is returned in the respective fields + */ + UniformQuantizationInfo uniform() const + { + UniformQuantizationInfo uqinfo; + uqinfo.scale = scale.empty() ? 0 : scale[0]; + uqinfo.offset = offset.empty() ? 0 : offset[0]; + + return uqinfo; + } + + std::vector scale; /**< Vector containing scaling factors */ + std::vector offset; /**< Vector containing zero offsets */ +}; + +/** Check whether two quantization info are equal. + * + * @param[in] lhs RHS quantization info. + * @param[in] rhs LHS quantization info. + * + * @return True if the given quantization info is the same. + */ +inline bool operator==(const QuantizationInfo &lhs, const QuantizationInfo &rhs) +{ + return (lhs.scale == rhs.scale) && (lhs.offset == rhs.offset); +} + +/** Check whether two quantization info are not equal. + * + * @param[in] lhs RHS quantization info. + * @param[in] rhs LHS quantization info. + * + * @return True if the given quantization info is the same. + */ +inline bool operator!=(const QuantizationInfo &lhs, const QuantizationInfo &rhs) +{ + return !(operator==(lhs, rhs)); +} + +/** Check whether two quantization info are equal. + * + * @param[in] lhs RHS quantization info. + * @param[in] rhs LHS quantization info. + * + * @return True if the given quantization info is the same. + */ +inline bool operator==(const UniformQuantizationInfo &lhs, const UniformQuantizationInfo &rhs) +{ + return (lhs.scale == rhs.scale) && (lhs.offset == rhs.offset); +} + +/** Check whether two quantization info are not equal. + * + * @param[in] lhs RHS quantization info. + * @param[in] rhs LHS quantization info. + * + * @return True if the given quantization info is the same. + */ +inline bool operator!=(const UniformQuantizationInfo &lhs, const UniformQuantizationInfo &rhs) +{ + return !(operator==(lhs, rhs)); +} + +/** Quantize a value given a asymmetric quantization scheme + * + * @param[in] value Value to quantize + * @param[in] qinfo Quantization information to use for quantizing + * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up + * + * @return Quantized value + */ +inline uint8_t quantize_qasymm8(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) +{ + int quantized = arm_compute::round(value / qinfo.scale, rounding_policy) + qinfo.offset; + quantized = std::max(0, std::min(quantized, 255)); + return quantized; +} + +/** Quantize a value given a asymmetric quantization scheme + * + * @param[in] value Value to quantize + * @param[in] qinfo Quantization information to use for quantizing + * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up + * + * @return Quantized value + */ +inline uint8_t quantize_qasymm8(float value, const QuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) +{ + UniformQuantizationInfo uqinfo = qinfo.uniform(); + int quantized = arm_compute::round(value / uqinfo.scale, rounding_policy) + uqinfo.offset; + quantized = std::max(0, std::min(quantized, 255)); + return quantized; +} + +/** Quantize a value given a symmetric quantization scheme + * + * @param[in] value Value to quantize + * @param[in] qinfo Quantization information to use for quantizing + * + * @return Quantized value + */ +inline int8_t quantize_qsymm8(float value, const QuantizationInfo &qinfo) +{ + int quantized = arm_compute::round(value / qinfo.uniform().scale, RoundingPolicy::TO_NEAREST_UP); + quantized = std::max(-128, std::min(quantized, 127)); + return quantized; +} + +/** Dequantize a value given a asymmetric quantization scheme + * + * @param[in] value Value to dequantize + * @param[in] qinfo Quantization information to use for dequantizing + * + * @return Dequantized value + */ +inline float dequantize_qasymm8(uint8_t value, const UniformQuantizationInfo &qinfo) +{ + return (static_cast(value) - qinfo.offset) * qinfo.scale; +} + +/** Dequantize a value given a asymmetric quantization scheme + * + * @param[in] value Value to dequantize + * @param[in] qinfo Quantization information to use for dequantizing + * + * @return Dequantized value + */ +inline float dequantize_qasymm8(uint8_t value, const QuantizationInfo &qinfo) +{ + UniformQuantizationInfo uqinfo = qinfo.uniform(); + return (static_cast(value) - uqinfo.offset) * uqinfo.scale; +} + +/** Dequantize a value given a symmetric quantization scheme + * + * @param[in] value Value to dequantize + * @param[in] qinfo Quantization information to use for dequantizing + * + * @return Dequantized value + */ +inline float dequantize_qsymm8(int8_t value, const QuantizationInfo &qinfo) +{ + return value * qinfo.uniform().scale; +} +} // namespace arm_compute +#endif /*__ARM_COMPUTE_QUANTIZATION_INFO_H__ */ \ No newline at end of file diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 972d6ef3c5..1787e68130 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -25,8 +25,7 @@ #define __ARM_COMPUTE_TYPES_H__ #include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/QAsymm8.h" -#include "arm_compute/core/Rounding.h" +#include "arm_compute/core/QuantizationInfo.h" #include "arm_compute/core/Size2D.h" #include "arm_compute/core/Strides.h" #include "arm_compute/core/TensorShape.h" @@ -73,20 +72,22 @@ enum class Format /** Available data types */ enum class DataType { - UNKNOWN, /**< Unknown data type */ - U8, /**< unsigned 8-bit number */ - S8, /**< signed 8-bit number */ - QASYMM8, /**< quantized, asymmetric fixed-point 8-bit number */ - U16, /**< unsigned 16-bit number */ - S16, /**< signed 16-bit number */ - U32, /**< unsigned 32-bit number */ - S32, /**< signed 32-bit number */ - U64, /**< unsigned 64-bit number */ - S64, /**< signed 64-bit number */ - F16, /**< 16-bit floating-point number */ - F32, /**< 32-bit floating-point number */ - F64, /**< 64-bit floating-point number */ - SIZET /**< size_t */ + UNKNOWN, /**< Unknown data type */ + U8, /**< unsigned 8-bit number */ + S8, /**< signed 8-bit number */ + QSYMM8, /**< quantized, symmetric fixed-point 8-bit number */ + QASYMM8, /**< quantized, asymmetric fixed-point 8-bit number */ + QSYMM8_PER_CHANNEL, /**< quantized, symmetric per channel fixed-point 8-bit number */ + U16, /**< unsigned 16-bit number */ + S16, /**< signed 16-bit number */ + U32, /**< unsigned 32-bit number */ + S32, /**< signed 32-bit number */ + U64, /**< unsigned 64-bit number */ + S64, /**< signed 64-bit number */ + F16, /**< 16-bit floating-point number */ + F32, /**< 32-bit floating-point number */ + F64, /**< 64-bit floating-point number */ + SIZET /**< size_t */ }; /** Available Sampling Policies */ @@ -160,86 +161,6 @@ enum class ComparisonOperation LessEqual /**< Less equal comparison ( \f$ x <= y \f$ ) */ }; -/** Quantization settings (used for QASYMM8 data type) */ -struct QuantizationInfo -{ - /** Default constructor */ - QuantizationInfo() noexcept - : scale(0.0f), - offset(0) - { - } - - /** Construct quantization info. - * - * @param[in] scale Scale. - * @param[in] offset Offset. - */ - QuantizationInfo(float scale, int offset) - : scale(scale), offset(offset) - { - } - - /** Check whether equal to a given quantization info. - * - * @param[in] other Other quantization info. - * - * @return True if the given quantization info is the same. - */ - bool operator==(const QuantizationInfo &other) const - { - return scale == other.scale && offset == other.offset; - } - - /** Check whether not equal to a given quantization info. - * - * @param[in] other Other quantization info. - * - * @return True if the given quantization info is not the same. - */ - bool operator!=(const QuantizationInfo &other) const - { - return !(*this == other); - } - - float scale; /**< scale */ - int offset; /**< offset */ - - /** Quantizes a value using the scale/offset in this QuantizationInfo - * - * @param[in] value Value to quantize. - * @param[in] rounding_policy Policy to use when rounding. - * - * @return the quantized value. - */ - qasymm8_t quantize(float value, RoundingPolicy rounding_policy) const - { - ARM_COMPUTE_ERROR_ON_MSG(scale == 0, "QuantizationInfo::quantize: scale == 0"); - return sqcvt_qasymm8_f32(value, scale, offset, rounding_policy); - } - - /** Dequantizes a value using the scale/offset in this QuantizationInfo - * - * @param[in] value Value to dequantize. - * - * @return the original value before quantization. - */ - float dequantize(qasymm8_t value) const - { - ARM_COMPUTE_ERROR_ON_MSG(scale == 0, "QuantizationInfo::dequantize: scale == 0"); - return scvt_f32_qasymm8(value, scale, offset); - } - - /** Indicates whether this QuantizationInfo has valid settings or not - * - * @return True if the this has invalid settings. - */ - bool empty() const - { - return scale == 0; - } -}; - /** Container for valid region of a window */ struct ValidRegion { diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h index 1de0df6096..8630eeee23 100644 --- a/arm_compute/core/Utils.h +++ b/arm_compute/core/Utils.h @@ -111,7 +111,9 @@ inline size_t data_size_from_type(DataType data_type) { case DataType::U8: case DataType::S8: + case DataType::QSYMM8: case DataType::QASYMM8: + case DataType::QSYMM8_PER_CHANNEL: return 1; case DataType::U16: case DataType::S16: @@ -183,7 +185,9 @@ inline size_t element_size_from_data_type(DataType dt) { case DataType::S8: case DataType::U8: + case DataType::QSYMM8: case DataType::QASYMM8: + case DataType::QSYMM8_PER_CHANNEL: return 1; case DataType::U16: case DataType::S16: @@ -521,7 +525,9 @@ inline DataType get_promoted_data_type(DataType dt) return DataType::U32; case DataType::S16: return DataType::S32; + case DataType::QSYMM8: case DataType::QASYMM8: + case DataType::QSYMM8_PER_CHANNEL: case DataType::F16: case DataType::U32: case DataType::S32: @@ -999,7 +1005,9 @@ inline bool is_data_type_quantized(DataType dt) { switch(dt) { + case DataType::QSYMM8: case DataType::QASYMM8: + case DataType::QSYMM8_PER_CHANNEL: return true; default: return false; @@ -1059,14 +1067,14 @@ inline size_t num_of_elements_in_range(const float start, const float end, const /** Returns true if the value can be represented by the given data type * - * @param[in] val value to be checked - * @param[in] dt data type that is checked - * @param[in] quant_info quantization info if the data type is QASYMM8 + * @param[in] val value to be checked + * @param[in] dt data type that is checked + * @param[in] qinfo (Optional) quantization info if the data type is QASYMM8 * * @return true if the data type can hold the value. */ template -bool check_value_range(T val, DataType dt, QuantizationInfo quant_info = QuantizationInfo()) +bool check_value_range(T val, DataType dt, QuantizationInfo qinfo = QuantizationInfo()) { switch(dt) { @@ -1074,8 +1082,8 @@ bool check_value_range(T val, DataType dt, QuantizationInfo quant_info = Quantiz return ((static_cast(val) == val) && val >= std::numeric_limits::lowest() && val <= std::numeric_limits::max()); case DataType::QASYMM8: { - double min = static_cast(quant_info.dequantize(0)); - double max = static_cast(quant_info.dequantize(std::numeric_limits::max())); + double min = static_cast(dequantize_qasymm8(0, qinfo)); + double max = static_cast(dequantize_qasymm8(std::numeric_limits::max(), qinfo)); return ((double)val >= min && (double)val <= max); } case DataType::S8: diff --git a/arm_compute/runtime/CL/CLSubTensor.h b/arm_compute/runtime/CL/CLSubTensor.h index 9c37f8be7c..1625aa5cb6 100644 --- a/arm_compute/runtime/CL/CLSubTensor.h +++ b/arm_compute/runtime/CL/CLSubTensor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -88,6 +88,7 @@ public: ITensorInfo *info() const override; ITensorInfo *info() override; const cl::Buffer &cl_buffer() const override; + CLQuantization quantization() const override; protected: // Inherited methods overridden: diff --git a/arm_compute/runtime/CL/CLTensor.h b/arm_compute/runtime/CL/CLTensor.h index c47d2be1b0..65ff4f2bba 100644 --- a/arm_compute/runtime/CL/CLTensor.h +++ b/arm_compute/runtime/CL/CLTensor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -32,6 +32,7 @@ namespace arm_compute { +// Forward declarations class ITensorAllocator; class ITensorInfo; @@ -66,6 +67,7 @@ public: TensorInfo *info() const override; TensorInfo *info() override; const cl::Buffer &cl_buffer() const override; + CLQuantization quantization() const override; protected: // Inherited methods overridden: diff --git a/arm_compute/runtime/CL/CLTensorAllocator.h b/arm_compute/runtime/CL/CLTensorAllocator.h index 302bd6d52a..f942478ada 100644 --- a/arm_compute/runtime/CL/CLTensorAllocator.h +++ b/arm_compute/runtime/CL/CLTensorAllocator.h @@ -24,9 +24,11 @@ #ifndef __ARM_COMPUTE_CLTENSORALLOCATOR_H__ #define __ARM_COMPUTE_CLTENSORALLOCATOR_H__ +#include "arm_compute/runtime/CL/CLArray.h" #include "arm_compute/runtime/CL/CLMemory.h" #include "arm_compute/runtime/ITensorAllocator.h" +#include "arm_compute/core/CL/CLTypes.h" #include "arm_compute/core/CL/OpenCL.h" #include @@ -67,6 +69,11 @@ public: * @return pointer to the CL data. */ const cl::Buffer &cl_data() const; + /** Wrapped quantization info data accessor + * + * @return A wrapped quantization info object. + */ + CLQuantization quantization() const; /** Enqueue a map operation of the allocated buffer on the given queue. * @@ -137,6 +144,8 @@ private: CLMemory _memory; /**< OpenCL memory */ uint8_t *_mapping; /**< Pointer to the CPU mapping of the OpenCL buffer. */ CLTensor *_owner; /**< Owner of the allocator */ + CLFloatArray _scale; + CLInt32Array _offset; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_CLTENSORALLOCATOR_H__ */ diff --git a/arm_compute/runtime/CL/CLTunerTypes.h b/arm_compute/runtime/CL/CLTunerTypes.h index 7d13b6d3fa..20c026e4dc 100644 --- a/arm_compute/runtime/CL/CLTunerTypes.h +++ b/arm_compute/runtime/CL/CLTunerTypes.h @@ -26,6 +26,7 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/utils/misc/Utility.h" + #include namespace arm_compute diff --git a/arm_compute/runtime/ITensorAllocator.h b/arm_compute/runtime/ITensorAllocator.h index bb708f0b97..f829cf25ab 100644 --- a/arm_compute/runtime/ITensorAllocator.h +++ b/arm_compute/runtime/ITensorAllocator.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -96,5 +96,5 @@ private: TensorInfo _info; /**< Tensor's metadata. */ size_t _alignment; /**< Tensor's alignment in bytes */ }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_ITENSORALLOCATOR_H__ */ diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp index d601dfc20d..65e6561b0a 100644 --- a/src/core/CL/kernels/CLActivationLayerKernel.cpp +++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp @@ -122,42 +122,43 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act int a_const_int = 0; int b_const_int = 0; + const bool is_quantized_asymmetric = is_data_type_quantized_asymmetric(dt); // Create quantized version of constants a, b if needed - if(is_data_type_quantized(dt)) + if(is_quantized_asymmetric) { - a_const_int = input->info()->quantization_info().quantize(a_const, RoundingPolicy::TO_NEAREST_UP); - b_const_int = input->info()->quantization_info().quantize(b_const, RoundingPolicy::TO_NEAREST_UP); + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + a_const_int = quantize_qasymm8(a_const, iq_info); + b_const_int = quantize_qasymm8(b_const, iq_info); } - const bool is_logistic_activation_quantized = is_data_type_quantized_asymmetric(dt) && act_info.activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC; + const bool is_logistic_activation_quantized = is_quantized_asymmetric && act_info.activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC; // Set build options CLBuildOptions build_opts; build_opts.add_option_if(!is_logistic_activation_quantized, "-DACT=" + lower_string(string_from_activation_func(act_info.activation()))); build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(dt))); build_opts.add_option(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - if(is_data_type_quantized(dt)) + if(is_quantized_asymmetric) { build_opts.add_option(("-DA_VAL=" + support::cpp11::to_string(a_const_int))); build_opts.add_option(("-DB_VAL=" + support::cpp11::to_string(b_const_int))); - const int o1 = input->info()->quantization_info().offset; - const float s1 = input->info()->quantization_info().scale; + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + // Quantized value of 0 corresponds to the offset o1 - build_opts.add_option(("-DCONST_0=" + support::cpp11::to_string(o1))); - build_opts.add_option(("-DS1_VAL=" + float_to_string_with_full_precision(s1))); - build_opts.add_option(("-DO1_VAL=" + support::cpp11::to_string(o1))); + build_opts.add_option(("-DCONST_0=" + support::cpp11::to_string(iq_info.offset))); + build_opts.add_option(("-DS1_VAL=" + float_to_string_with_full_precision(iq_info.scale))); + build_opts.add_option(("-DO1_VAL=" + support::cpp11::to_string(iq_info.offset))); // Set scale and offset of the input and output if they have different quantization info - if(is_data_type_quantized_asymmetric(dt) && output != nullptr) + if(is_quantized_asymmetric && output != nullptr) { - const float s2 = output->info()->quantization_info().scale; - const int o2 = output->info()->quantization_info().offset; + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); - if(o1 != o2 || s1 != s2) + if(iq_info != oq_info) { - build_opts.add_option(("-DS2_VAL=" + float_to_string_with_full_precision(s2))); - build_opts.add_option(("-DO2_VAL=" + support::cpp11::to_string(o2))); + build_opts.add_option(("-DS2_VAL=" + float_to_string_with_full_precision(oq_info.scale))); + build_opts.add_option(("-DO2_VAL=" + support::cpp11::to_string(oq_info.offset))); } } } @@ -171,7 +172,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act // Create kernel std::string kernel_name = std::string("activation_layer"); - if(is_data_type_quantized_asymmetric(dt)) + if(is_quantized_asymmetric) { kernel_name += is_logistic_activation_quantized ? std::string("_logistic_qa8") : std::string("_qa8"); } diff --git a/src/core/CL/kernels/CLComparisonKernel.cpp b/src/core/CL/kernels/CLComparisonKernel.cpp index 4f44851ef8..628f9f18e7 100644 --- a/src/core/CL/kernels/CLComparisonKernel.cpp +++ b/src/core/CL/kernels/CLComparisonKernel.cpp @@ -134,10 +134,13 @@ void CLComparisonKernel::configure(const ICLTensor *input1, const ICLTensor *inp build_opts.emplace("-DOP_NAME=" + lower_string(operation_name)); if(is_data_type_quantized_asymmetric(input1->info()->data_type())) { - build_opts.emplace("-DOFFSET_IN1=" + support::cpp11::to_string(input1->info()->quantization_info().offset)); - build_opts.emplace("-DOFFSET_IN2=" + support::cpp11::to_string(input2->info()->quantization_info().offset)); - build_opts.emplace("-DSCALE_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().scale)); - build_opts.emplace("-DSCALE_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().scale)); + const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform(); + + build_opts.emplace("-DOFFSET_IN1=" + support::cpp11::to_string(iq1_info.offset)); + build_opts.emplace("-DOFFSET_IN2=" + support::cpp11::to_string(iq2_info.offset)); + build_opts.emplace("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale)); + build_opts.emplace("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale)); kernel_name += "_quantized"; } diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp index 1cae3712dc..5e1bbe944f 100644 --- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp @@ -99,10 +99,13 @@ void CLDepthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info()) { - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().offset)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); } // Create kernel diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp index cd25bb1e7f..615327a7cc 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp @@ -251,30 +251,34 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input, if(is_qasymm) { - float multiplier = _input->info()->quantization_info().scale * _weights->info()->quantization_info().scale / _output->info()->quantization_info().scale; + const UniformQuantizationInfo iq_info = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = _weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = _output->info()->quantization_info().uniform(); + + float multiplier = iq_info.scale * wq_info.scale / oq_info.scale; int output_multiplier = 0; int output_shift = 0; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); build_opts.add_option("-DCONV_STRIDE_Y=" + support::cpp11::to_string(_conv_stride_y)); - build_opts.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-_input->info()->quantization_info().offset)); - build_opts.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-_weights->info()->quantization_info().offset)); - build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(_output->info()->quantization_info().offset)); - build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * input->info()->quantization_info().offset * weights->info()->quantization_info().offset)); + build_opts.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-iq_info.offset)); + build_opts.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-wq_info.offset)); + build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(oq_info.offset)); + build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * iq_info.offset * wq_info.offset)); build_opts.add_option("-DOUTPUT_MULTIPLIER=" + support::cpp11::to_string(output_multiplier)); build_opts.add_option("-DOUTPUT_SHIFT=" + support::cpp11::to_string(output_shift)); if(act_info.enabled()) { - const int a_val = output->info()->quantization_info().quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP); - const int b_val = output->info()->quantization_info().quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP); - const int o1 = output->info()->quantization_info().offset; + const int a_val = quantize_qasymm8(act_info.a(), oq_info); + const int b_val = quantize_qasymm8(act_info.b(), oq_info); + const int o1 = oq_info.offset; build_opts.add_option("-DA_VAL=" + support::cpp11::to_string(a_val)); build_opts.add_option("-DB_VAL=" + support::cpp11::to_string(b_val)); build_opts.add_option("-DCONST_0=" + support::cpp11::to_string(o1)); - const float s1 = input->info()->quantization_info().scale; + const float s1 = iq_info.scale; build_opts.add_option("-DS1_VAL=" + float_to_string_with_full_precision(s1)); build_opts.add_option("-DO1_VAL=" + support::cpp11::to_string(o1)); } diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp index 758e99b77e..e32faa10df 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp @@ -213,30 +213,34 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input, if(is_qasymm) { - float multiplier = _input->info()->quantization_info().scale * _weights->info()->quantization_info().scale / _output->info()->quantization_info().scale; + const UniformQuantizationInfo iq_info = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = _weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = _output->info()->quantization_info().uniform(); + + float multiplier = iq_info.scale * wq_info.scale / oq_info.scale; int output_multiplier = 0; int output_shift = 0; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); build_opts.add_option("-DSRC_DIM_1=" + support::cpp11::to_string(_input->info()->dimension(1))); - build_opts.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-_input->info()->quantization_info().offset)); - build_opts.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-_weights->info()->quantization_info().offset)); - build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(_output->info()->quantization_info().offset)); - build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * input->info()->quantization_info().offset * weights->info()->quantization_info().offset)); + build_opts.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-iq_info.offset)); + build_opts.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-wq_info.offset)); + build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(oq_info.offset)); + build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * iq_info.offset * wq_info.offset)); build_opts.add_option("-DOUTPUT_MULTIPLIER=" + support::cpp11::to_string(output_multiplier)); build_opts.add_option("-DOUTPUT_SHIFT=" + support::cpp11::to_string(output_shift)); if(act_info.enabled()) { - const int a_val = output->info()->quantization_info().quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP); - const int b_val = output->info()->quantization_info().quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP); - const int o1 = output->info()->quantization_info().offset; + const int a_val = quantize_qasymm8(act_info.a(), oq_info); + const int b_val = quantize_qasymm8(act_info.b(), oq_info); + const int o1 = oq_info.offset; build_opts.add_option("-DA_VAL=" + support::cpp11::to_string(a_val)); build_opts.add_option("-DB_VAL=" + support::cpp11::to_string(b_val)); build_opts.add_option("-DCONST_0=" + support::cpp11::to_string(o1)); - const float s1 = input->info()->quantization_info().scale; + const float s1 = iq_info.scale; build_opts.add_option("-DS1_VAL=" + float_to_string_with_full_precision(s1)); build_opts.add_option("-DO1_VAL=" + support::cpp11::to_string(o1)); } diff --git a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp index 28d4ff2759..0312a57664 100644 --- a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp @@ -72,9 +72,10 @@ void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *outpu _input = input; _output = output; - const DataLayout data_layout = input->info()->data_layout(); - const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const DataLayout data_layout = input->info()->data_layout(); + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform(); // Create kernel CLBuildOptions build_opts; @@ -96,7 +97,7 @@ void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *outpu build_opts.add_option("-D" + string_from_data_layout(input->info()->data_layout())); build_opts.add_option_if(has_bias, "-DHAS_BIAS"); build_opts.add_option_if_else(is_data_type_quantized_asymmetric(input->info()->data_type()), - "-DPAD_VALUE=" + support::cpp11::to_string(input->info()->quantization_info().offset), + "-DPAD_VALUE=" + support::cpp11::to_string(qinfo.offset), "-DPAD_VALUE=0"); _kernel = static_cast(CLKernelLibrary::get().create_kernel("depthwise_im2col", build_opts.options())); diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp index 78cc5596dd..0b066837a9 100644 --- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp @@ -95,10 +95,12 @@ void CLDequantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *o } ICLKernel::configure_internal(win); + const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform(); + // Create kernel CLBuildOptions build_opts; - build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(input->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(input->info()->quantization_info().offset)); + build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(qinfo.scale)); + build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(qinfo.offset)); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x)); build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type())); build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max(output_width_x - vec_size_x, 0))); diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp index 12affa9880..3e158a52ff 100644 --- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp +++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp @@ -452,16 +452,20 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL // Set static kernel arguments if(is_data_type_quantized_asymmetric(data_type)) { + const UniformQuantizationInfo iqinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wqinfo = _weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oqinfo = _output->info()->quantization_info().uniform(); + int output_multiplier = 0; int output_shift = 0; - float multiplier = _input->info()->quantization_info().scale * _weights->info()->quantization_info().scale / _output->info()->quantization_info().scale; + float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale; ARM_COMPUTE_THROW_ON_ERROR(quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift)); unsigned int idx = 3 * num_arguments_per_3D_tensor() + ((_biases != nullptr) ? num_arguments_per_1D_tensor() : 0) + 1; - _kernel.setArg(idx++, -_input->info()->quantization_info().offset); - _kernel.setArg(idx++, -_weights->info()->quantization_info().offset); - _kernel.setArg(idx++, _output->info()->quantization_info().offset); + _kernel.setArg(idx++, -iqinfo.offset); + _kernel.setArg(idx++, -wqinfo.offset); + _kernel.setArg(idx++, oqinfo.offset); _kernel.setArg(idx++, output_multiplier); _kernel.setArg(idx++, output_shift); } diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp index 414b040f4c..1d9c71555a 100644 --- a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp +++ b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp @@ -134,12 +134,16 @@ CLBuildOptions generate_build_options_with_arithmetic_rules(const ITensorInfo &i build_opts.add_option("-DOP=" + operation_string); if(is_data_type_quantized_asymmetric(input1.data_type())) { - build_opts.add_option("-DOFFSET_IN1=" + support::cpp11::to_string(input1.quantization_info().offset)); - build_opts.add_option("-DOFFSET_IN2=" + support::cpp11::to_string(input2.quantization_info().offset)); - build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(output.quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input1.quantization_info().scale)); - build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(input2.quantization_info().scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output.quantization_info().scale)); + const UniformQuantizationInfo iq1info = input1.quantization_info().uniform(); + const UniformQuantizationInfo iq2info = input2.quantization_info().uniform(); + const UniformQuantizationInfo oqinfo = output.quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + support::cpp11::to_string(iq1info.offset)); + build_opts.add_option("-DOFFSET_IN2=" + support::cpp11::to_string(iq2info.offset)); + build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(oqinfo.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1info.scale)); + build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2info.scale)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oqinfo.scale)); } return build_opts; } diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp index 11a4292270..0ff2f1343a 100644 --- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -104,9 +104,12 @@ void CLGEMMMatrixVectorMultiplyKernel::configure(const ICLTensor *input0, const // Add static arguments if(is_quantized) { + const UniformQuantizationInfo iq0_info = _input0->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq1_info = _input1->info()->quantization_info().uniform(); + unsigned int idx = num_arguments_per_3D_tensor() + num_arguments_per_2D_tensor() + num_arguments_per_1D_tensor(); - _kernel.setArg(idx++, -_input0->info()->quantization_info().offset); - _kernel.setArg(idx++, -_input1->info()->quantization_info().offset); + _kernel.setArg(idx++, -iq0_info.offset); + _kernel.setArg(idx++, -iq1_info.offset); } // Configure kernel window diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp index e3f2a96281..4da3e245c0 100644 --- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp @@ -133,10 +133,13 @@ void CLHeightConcatenateLayerKernel::configure(const ICLTensor *input, unsigned if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info()) { - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().offset)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); } // Create kernel diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index 8caa927f8b..10d6e68cd9 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -162,10 +162,11 @@ Im2ColConfiguration configure_opencl_kernel(const ITensorInfo *input, const Size const std::pair convolved_dims = scaled_dimensions(input_width, input_height, kernel_dims.width, kernel_dims.height, conv_info, dilation); // Im2Col configuration - std::string kernel_name = "im2col_generic_"; - CLBuildOptions build_opts; - unsigned int num_elems_processed_per_iteration = 1; - bool is_padding_required_nchw = false; + std::string kernel_name = "im2col_generic_"; + CLBuildOptions build_opts; + unsigned int num_elems_processed_per_iteration = 1; + bool is_padding_required_nchw = false; + const UniformQuantizationInfo qinfo = input->quantization_info().uniform(); build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)); build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(input->element_size())); @@ -185,7 +186,7 @@ Im2ColConfiguration configure_opencl_kernel(const ITensorInfo *input, const Size build_opts.add_option("-DDILATION_X=" + support::cpp11::to_string(dilation.x())); build_opts.add_option("-DDILATION_Y=" + support::cpp11::to_string(dilation.y())); build_opts.add_option_if(num_groups > 1, "-DNUM_GROUPS=" + support::cpp11::to_string(num_groups)); - build_opts.add_option_if_else(is_data_type_quantized(data_type), "-DPAD_VALUE=" + support::cpp11::to_string(input->quantization_info().offset), "-DPAD_VALUE=0"); + build_opts.add_option_if_else(is_data_type_quantized(data_type), "-DPAD_VALUE=" + support::cpp11::to_string(qinfo.offset), "-DPAD_VALUE=0"); build_opts.add_option_if(has_bias, "-DHAS_BIAS"); if(data_layout == DataLayout::NHWC) diff --git a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp index 90330163ea..b255ba346f 100644 --- a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp +++ b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp @@ -123,8 +123,9 @@ void CLNormalizePlanarYUVLayerKernel::configure(const ICLTensor *input, ICLTenso std::string kernel_name = "normalize_planar_yuv_layer_"; if(is_data_type_quantized(dt)) { - build_opts.add_option(("-DOFFSET=" + support::cpp11::to_string(input->info()->quantization_info().offset))); - build_opts.add_option(("-DSCALE=" + support::cpp11::to_string(input->info()->quantization_info().scale))); + const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform(); + build_opts.add_option(("-DOFFSET=" + support::cpp11::to_string(qinfo.offset))); + build_opts.add_option(("-DSCALE=" + support::cpp11::to_string(qinfo.scale))); kernel_name += "q8_"; } diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp index dda9b16083..050bbb810b 100644 --- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp +++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp @@ -181,12 +181,16 @@ void CLPixelWiseMultiplicationKernel::configure(const ICLTensor *input1, const I CLBuildOptions build_opts; if(is_quantized) { - build_opts.add_option("-DOFFSET_IN1=" + support::cpp11::to_string(input1->info()->quantization_info().offset)); - build_opts.add_option("-DOFFSET_IN2=" + support::cpp11::to_string(input2->info()->quantization_info().offset)); - build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + support::cpp11::to_string(input1->info()->quantization_info().scale)); - build_opts.add_option("-DSCALE_IN2=" + support::cpp11::to_string(input2->info()->quantization_info().scale)); - build_opts.add_option("-DSCALE_OUT=" + support::cpp11::to_string(output->info()->quantization_info().scale)); + const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + support::cpp11::to_string(iq1_info.offset)); + build_opts.add_option("-DOFFSET_IN2=" + support::cpp11::to_string(iq2_info.offset)); + build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(oq_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + support::cpp11::to_string(iq1_info.scale)); + build_opts.add_option("-DSCALE_IN2=" + support::cpp11::to_string(iq2_info.scale)); + build_opts.add_option("-DSCALE_OUT=" + support::cpp11::to_string(oq_info.scale)); kernel_name += "_quantized"; } else diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp index 7ccbda9be3..8eaf5bf76f 100644 --- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp @@ -205,10 +205,13 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info()) { - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().offset)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); } // Check output dimensions diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp index 374b22eab1..22d4e3345f 100644 --- a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp @@ -93,10 +93,12 @@ void CLQuantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *out } ICLKernel::configure_internal(win); + const UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform(); + // Create kernel CLBuildOptions build_opts; - build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(output->info()->quantization_info().offset)); + build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(qinfo.scale)); + build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(qinfo.offset)); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x)); build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max(input_width_x - vec_size_x, 0))); diff --git a/src/core/CL/kernels/CLRangeKernel.cpp b/src/core/CL/kernels/CLRangeKernel.cpp index eb8822b957..a22f5cb4cb 100644 --- a/src/core/CL/kernels/CLRangeKernel.cpp +++ b/src/core/CL/kernels/CLRangeKernel.cpp @@ -116,8 +116,9 @@ void CLRangeKernel::configure(ICLTensor *output, const float start, const float build_opts.add_option("-DSTEP=" + support::cpp11::to_string(step)); if(is_data_type_quantized_asymmetric(output->info()->data_type())) { - build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); + const UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform(); + build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(qinfo.offset)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(qinfo.scale)); kernel_name += "_quantized"; } // Create kernel diff --git a/src/core/CL/kernels/CLScaleKernel.cpp b/src/core/CL/kernels/CLScaleKernel.cpp index cd89d1c6db..488313fd12 100644 --- a/src/core/CL/kernels/CLScaleKernel.cpp +++ b/src/core/CL/kernels/CLScaleKernel.cpp @@ -206,8 +206,9 @@ void CLScaleKernel::configure(const ICLTensor *input, ICLTensor *output, Interpo build_opts.add_option_if_else(sampling_policy == SamplingPolicy::CENTER, "-DSAMPLING_POLICY_CENTER", "-DSAMPLING_POLICY_TOP_LEFT"); if(call_quantized_kernel) { - build_opts.add_option("-DSCALE=" + support::cpp11::to_string(input->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(input->info()->quantization_info().offset)); + const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform(); + build_opts.add_option("-DSCALE=" + support::cpp11::to_string(qinfo.scale)); + build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(qinfo.offset)); } std::string interpolation_name = string_from_interpolation_policy(policy); diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp index e2d988103c..a9c08703c0 100644 --- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp @@ -233,15 +233,16 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor _output = output; _sum = sum; - const DataType dt = input->info()->data_type(); - const size_t reduction_dim_size = input->info()->dimension(0); + const DataType dt = input->info()->data_type(); + const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform(); + const size_t reduction_dim_size = input->info()->dimension(0); // Set build options CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(dt)); build_opts.add_option_if(dt == DataType::F16, "-DUSE_F16"); build_opts.add_option_if(is_data_type_float(dt) && (beta != 1.0f), "-DBETA=" + float_to_string_with_full_precision(beta)); - build_opts.add_options_if(is_data_type_quantized_asymmetric(dt), prepare_quantized_softmax_build_options(input->info()->quantization_info().scale, beta).options()); + build_opts.add_options_if(is_data_type_quantized_asymmetric(dt), prepare_quantized_softmax_build_options(qinfo.scale, beta).options()); cl::NDRange lws_hint(cl::NullRange); std::string kernel_name = is_data_type_quantized_asymmetric(dt) ? std::string("softmax_layer_max_shift_exp_sum_quantized_serial") : @@ -338,9 +339,10 @@ void CLLogits1DNormKernel::configure(const ICLTensor *input, const ICLTensor *su ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output); // Note: output should always have a scale of 1/256 and offset 0 - const QuantizationInfo allowed_quantization_info = QuantizationInfo(1.F / 256, 0); - const bool is_quantized_asymmetric = (input->info()->data_type() == DataType::S32); - const DataType output_data_type = is_quantized_asymmetric ? DataType::QASYMM8 : input->info()->data_type(); + const QuantizationInfo allowed_quantization_info = QuantizationInfo(1.F / 256, 0); + const bool is_quantized_asymmetric = (input->info()->data_type() == DataType::S32); + const DataType output_data_type = is_quantized_asymmetric ? DataType::QASYMM8 : input->info()->data_type(); + const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform(); // Output auto initialization if not yet initialized auto_init_if_empty(*output->info(), @@ -357,7 +359,7 @@ void CLLogits1DNormKernel::configure(const ICLTensor *input, const ICLTensor *su CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); build_opts.add_options_if(is_quantized_asymmetric, - prepare_quantized_softmax_build_options(input->info()->quantization_info().scale, beta).options()); + prepare_quantized_softmax_build_options(qinfo.scale, beta).options()); // Create kernel std::string kernel_name = is_quantized_asymmetric ? "softmax_layer_norm_quantized" : "softmax_layer_norm"; diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp index 5f266c5ffa..bd4ff2c735 100644 --- a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp @@ -116,12 +116,16 @@ void CLWidthConcatenate2TensorsKernel::configure(const ICLTensor *input1, const const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(output->info(), input1->info(), input2->info()); if(is_data_type_quantized_asymmetric(input1->info()->data_type()) && have_different_qinfo) { - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); + const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale)); + build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(iq2_info.offset)); + build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); } // Create kernel diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp index 54edaafa29..a3ac102564 100644 --- a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp @@ -138,16 +138,22 @@ void CLWidthConcatenate4TensorsKernel::configure(const ICLTensor *input1, const const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(output->info(), input1->info(), input2->info(), input3->info(), input4->info()); if(is_data_type_quantized_asymmetric(input1->info()->data_type()) && have_different_qinfo) { - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET_IN3=" + float_to_string_with_full_precision(input3->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN3=" + float_to_string_with_full_precision(input3->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET_IN4=" + float_to_string_with_full_precision(input4->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN4=" + float_to_string_with_full_precision(input4->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); + const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq3_info = input3->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq4_info = input4->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale)); + build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(iq2_info.offset)); + build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale)); + build_opts.add_option("-DOFFSET_IN3=" + float_to_string_with_full_precision(iq3_info.offset)); + build_opts.add_option("-DSCALE_IN3=" + float_to_string_with_full_precision(iq3_info.scale)); + build_opts.add_option("-DOFFSET_IN4=" + float_to_string_with_full_precision(iq4_info.offset)); + build_opts.add_option("-DSCALE_IN4=" + float_to_string_with_full_precision(iq4_info.scale)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); } // Create kernel diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp index 6c32cd2371..b577944a03 100644 --- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp @@ -109,10 +109,13 @@ void CLWidthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info()) { - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().offset)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); + const UniformQuantizationInfo iqinfo = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo oqinfo = output->info()->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iqinfo.offset)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oqinfo.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iqinfo.scale)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oqinfo.scale)); } // Create kernel diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index bc6a281353..3f71553926 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -30,7 +30,6 @@ #include "arm_compute/core/NEON/NEFixedPoint.h" #include "arm_compute/core/NEON/NEMath.h" #include "arm_compute/core/NEON/wrapper/wrapper.h" -#include "arm_compute/core/QAsymm8.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" @@ -320,15 +319,15 @@ typename std::enable_if::value, void>::type NEActivat Iterator input(_input, win_collapsed); Iterator output(_output, win_collapsed); - const QuantizationInfo qi_in = _input->info()->quantization_info(); - const QuantizationInfo qi_out = _output->info()->quantization_info(); - const qasymm8x16_t va = vdupq_n_u8(sqcvt_qasymm8_f32(_act_info.a(), qi_in.scale, qi_in.offset)); - const qasymm8x16_t vb = vdupq_n_u8(sqcvt_qasymm8_f32(_act_info.b(), qi_in.scale, qi_in.offset)); - const qasymm8_t a = sqcvt_qasymm8_f32(_act_info.a(), qi_in.scale, qi_in.offset); - const qasymm8_t b = sqcvt_qasymm8_f32(_act_info.b(), qi_in.scale, qi_in.offset); - const qasymm8_t const_0 = sqcvt_qasymm8_f32(0.f, qi_in.scale, qi_in.offset); - const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0); - const auto vconst_1 = vdupq_n_f32(1.f); + const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); + const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in)); + const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in)); + const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in); + const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in); + const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in); + const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0); + const auto vconst_1 = vdupq_n_f32(1.f); // Initialise scale/offset for re-quantization float s = qi_in.scale / qi_out.scale; @@ -415,9 +414,9 @@ typename std::enable_if::value, void>::type NEActivat } else if(act == ActivationFunction::LOGISTIC) { - float tmp_f = scvt_f32_qasymm8(in, qi_in.scale, qi_in.offset); + float tmp_f = dequantize_qasymm8(in, qi_in); tmp_f = 1.f / (1.f + std::exp(-tmp_f)); - tmp = sqcvt_qasymm8_f32(tmp_f, qi_out.scale, qi_out.offset); + tmp = quantize_qasymm8(tmp_f, qi_out); } else { diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp index ca79a0a419..164026c1ab 100644 --- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp @@ -165,25 +165,26 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor const auto window_end_x = static_cast(window.x().end()); const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); - const float output_scale = out->info()->quantization_info().scale; - const int output_offset = out->info()->quantization_info().offset; + const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = out->info()->quantization_info().uniform(); - const float32x4_t vscale1 = vdupq_n_f32(in1->info()->quantization_info().scale); - const float32x4_t vscale2 = vdupq_n_f32(in2->info()->quantization_info().scale); - const float32x4_t invvscaleo = vdupq_n_f32(1.f / output_scale); - const int32x4_t voffset1 = vdupq_n_s32(in1->info()->quantization_info().offset); - const int32x4_t voffset2 = vdupq_n_s32(in2->info()->quantization_info().offset); - const float32x4_t voffseto = vdupq_n_f32(output_offset); + const float32x4_t vscale1 = vdupq_n_f32(iq1_info.scale); + const float32x4_t vscale2 = vdupq_n_f32(iq2_info.scale); + const float32x4_t invvscaleo = vdupq_n_f32(1.f / oq_info.scale); + const int32x4_t voffset1 = vdupq_n_s32(iq1_info.offset); + const int32x4_t voffset2 = vdupq_n_s32(iq2_info.offset); + const float32x4_t voffseto = vdupq_n_f32(oq_info.offset); if(is_broadcast_across_x) { - const bool is_broadcast_input_2 = input2_win.x().step() == 0; - Window broadcast_win = is_broadcast_input_2 ? input2_win : input1_win; - Window non_broadcast_win = !is_broadcast_input_2 ? input2_win : input1_win; - const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1; - const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1; - const QuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info(); - const QuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info(); + const bool is_broadcast_input_2 = input2_win.x().step() == 0; + Window broadcast_win = is_broadcast_input_2 ? input2_win : input1_win; + Window non_broadcast_win = !is_broadcast_input_2 ? input2_win : input1_win; + const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1; + const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1; + const UniformQuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info().uniform(); + const UniformQuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info().uniform(); // Clear X Dimension on execution window as we handle manually non_broadcast_win.set(Window::DimX, Window::Dimension(0, 1, 1)); @@ -252,7 +253,7 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor for(; x < window_end_x; ++x) { const float afs = static_cast(*(non_broadcast_input_ptr + x) - non_broadcast_qinfo.offset) * non_broadcast_qinfo.scale; - *(output_ptr + x) = out->info()->quantization_info().quantize((afs + bfs), RoundingPolicy::TO_NEAREST_UP); + *(output_ptr + x) = quantize_qasymm8((afs + bfs), oq_info); } }, broadcast_input, non_broadcast_input, output); @@ -263,9 +264,6 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor input1_win.set(Window::DimX, Window::Dimension(0, 1, 1)); input2_win.set(Window::DimX, Window::Dimension(0, 1, 1)); - const QuantizationInfo input1_qinfo = in1->info()->quantization_info(); - const QuantizationInfo input2_qinfo = in2->info()->quantization_info(); - Iterator input1(in1, input1_win); Iterator input2(in2, input2_win); Iterator output(out, win); @@ -328,9 +326,9 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor // Compute left-over elements for(; x < window_end_x; ++x) { - const float afs = static_cast((*(input1_ptr + x)) - input1_qinfo.offset) * input1_qinfo.scale; - const float bfs = static_cast((*(input2_ptr + x)) - input2_qinfo.offset) * input2_qinfo.scale; - *(output_ptr + x) = out->info()->quantization_info().quantize((afs + bfs), RoundingPolicy::TO_NEAREST_UP); + const float afs = static_cast((*(input1_ptr + x)) - iq1_info.offset) * iq1_info.scale; + const float bfs = static_cast((*(input2_ptr + x)) - iq2_info.offset) * iq2_info.scale; + *(output_ptr + x) = quantize_qasymm8((afs + bfs), out->info()->quantization_info()); } }, input1, input2, output); diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp index 45e1562d8d..8874b52e19 100644 --- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp @@ -87,10 +87,14 @@ void sub_saturate_QAYSMM8_QAYSMM8_QAYSMM8(const ITensor *in1, const ITensor *in2 Iterator input2(in2, window.broadcast_if_dimension_le_one(in2->info()->tensor_shape())); Iterator output(out, window); + const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = out->info()->quantization_info().uniform(); + execute_window_loop(window, [&](const Coordinates &) { - const float32x4x4_t ta1 = vdequantize(vld1q_u8(reinterpret_cast(input1.ptr())), in1->info()->quantization_info()); - const float32x4x4_t ta2 = vdequantize(vld1q_u8(reinterpret_cast(input2.ptr())), in2->info()->quantization_info()); + const float32x4x4_t ta1 = vdequantize(vld1q_u8(reinterpret_cast(input1.ptr())), iq1_info); + const float32x4x4_t ta2 = vdequantize(vld1q_u8(reinterpret_cast(input2.ptr())), iq2_info); const float32x4x4_t ta3 = { @@ -102,7 +106,7 @@ void sub_saturate_QAYSMM8_QAYSMM8_QAYSMM8(const ITensor *in1, const ITensor *in2 } }; - const uint8x16_t result = vquantize(ta3, out->info()->quantization_info()); + const uint8x16_t result = vquantize(ta3, oq_info); vst1q_u8(reinterpret_cast(output.ptr()), result); }, diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp index b360e9e6be..c9c70d6500 100644 --- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp @@ -53,9 +53,9 @@ void depth_concat(const ITensor *in, ITensor *out, int depth_offset, const Windo Iterator input(in, window); Iterator output(out, window); - const DataType dt = in->info()->data_type(); - const QuantizationInfo &input_qinfo = in->info()->quantization_info(); - const QuantizationInfo &output_qinfo = out->info()->quantization_info(); + const DataType dt = in->info()->data_type(); + const UniformQuantizationInfo input_qinfo = in->info()->quantization_info().uniform(); + const UniformQuantizationInfo output_qinfo = out->info()->quantization_info().uniform(); if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo) { execute_window_loop(window, [&](const Coordinates &) diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp index fdafc2da90..385be04e4a 100644 --- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp @@ -51,8 +51,8 @@ public: static void convolve(const Window &window, unsigned int num_elems_written_per_iteration, const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation) { - const int input_offset = -input->info()->quantization_info().offset; - const int weights_offset = -weights->info()->quantization_info().offset; + const int input_offset = -input->info()->quantization_info().uniform().offset; + const int weights_offset = -weights->info()->quantization_info().uniform().offset; const int input_stride_x = input->info()->strides_in_bytes().x(); const int input_stride_y = input->info()->strides_in_bytes().y(); diff --git a/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp b/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp index 88f8b31a35..53789e2472 100644 --- a/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp @@ -92,7 +92,7 @@ void NEDepthwiseIm2ColKernel::run_generic(const Window &window) auto zero = static_cast(0); if(std::is_same::value) { - zero = _input->info()->quantization_info().offset; + zero = _input->info()->quantization_info().uniform().offset; } execute_window_loop(window_out, [&](const Coordinates & id) diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp index 1520225249..a6dc0977d2 100644 --- a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp @@ -97,7 +97,7 @@ inline void store_result(float16_t *ptr, const float32x4x4_t &v) template void run_dequantization(const ITensor *input, ITensor *output, const Window &window) { - const QuantizationInfo &qinfo = input->info()->quantization_info(); + const UniformQuantizationInfo &qinfo = input->info()->quantization_info().uniform(); const int window_step_x = 16; const auto window_start_x = static_cast(window.x().start()); @@ -129,7 +129,7 @@ void run_dequantization(const ITensor *input, ITensor *output, const Window &win for(; x < window_end_x; ++x) { uint8_t val = *(in_ptr + x); - *(out_ptr + x) = static_cast(qinfo.dequantize(val)); + *(out_ptr + x) = static_cast(dequantize_qasymm8(val, qinfo)); } }, in, out); diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp index 33457e1fca..0fe05d2044 100644 --- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp @@ -142,9 +142,9 @@ inline ScalarType elementwise_arithm_op_scalar(const ScalarType &a, const Scalar } template -inline uint8_t elementwise_arithm_op_quantized_scalar(const float &a, const float &b, QuantizationInfo qinfo) +inline uint8_t elementwise_arithm_op_quantized_scalar(const float &a, const float &b, UniformQuantizationInfo qinfo) { - return qinfo.quantize(elementwise_arithm_op_scalar(a, b), RoundingPolicy::TO_NEAREST_UP); + return quantize_qasymm8(elementwise_arithm_op_scalar(a, b), qinfo); } template @@ -253,7 +253,7 @@ inline uint8_t elementwise_comp_op_scalar(const InputScalarType &a, const InputS } template -inline uint8_t elementwise_comp_op_quantized_scalar(const float &a, const float &b, QuantizationInfo qinfo) +inline uint8_t elementwise_comp_op_quantized_scalar(const float &a, const float &b, UniformQuantizationInfo qinfo) { ARM_COMPUTE_UNUSED(qinfo); return elementwise_comp_op_scalar(a, b); @@ -567,7 +567,7 @@ void elementwise_op(const ITensor *in1, const ITensor *in2, ITensor *out, const } void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, - uint8_t (*scalar_func)(const float &, const float &, QuantizationInfo), + uint8_t (*scalar_func)(const float &, const float &, UniformQuantizationInfo), int (*broadcast_func)(int, int, int, const uint8_t *, float32x4x4_t, uint8_t *, int32x4_t, float32x4_t, float32x4_t, float32x4_t, const bool), int (*neon_func)(int, int, int, const uint8_t *, const uint8_t *, uint8_t *, @@ -587,12 +587,11 @@ void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *o const auto window_end_x = static_cast(window.x().end()); const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); - const float output_scale = out->info()->quantization_info().scale; - const int output_offset = out->info()->quantization_info().offset; + const UniformQuantizationInfo output_qinfo = out->info()->quantization_info().uniform(); // Output quantization info (add 0.5 to round toward the nearest integer - 0.5 rounds away from zero) - const float32x4_t voffseto = vdupq_n_f32(output_offset + 0.5f); - const float32x4_t invvscaleo = vdupq_n_f32(1.f / output_scale); + const float32x4_t voffseto = vdupq_n_f32(output_qinfo.offset + 0.5f); + const float32x4_t invvscaleo = vdupq_n_f32(1.f / output_qinfo.scale); if(is_broadcast_across_x) { @@ -603,8 +602,8 @@ void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *o const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1; const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1; - const QuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info(); - const QuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info(); + const UniformQuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info().uniform(); + const UniformQuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info().uniform(); const int32x4_t voffset_non_broadcast = vdupq_n_s32(non_broadcast_qinfo.offset); const float32x4_t vscale_non_broadcast = vdupq_n_f32(non_broadcast_qinfo.scale); @@ -628,31 +627,30 @@ void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *o voffset_non_broadcast, vscale_non_broadcast, voffseto, invvscaleo, !is_broadcast_input_2); for(; x < window_end_x; ++x) { - const float afs = scvt_f32_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo.scale, non_broadcast_qinfo.offset); - const float bfs = scvt_f32_qasymm8(broadcast_value, broadcast_qinfo.scale, broadcast_qinfo.offset); - *(output_ptr + x) = (*scalar_func)(!is_broadcast_input_2 ? bfs : afs, !is_broadcast_input_2 ? afs : bfs, - out->info()->quantization_info()); + const float afs = dequantize_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo); + const float bfs = dequantize_qasymm8(broadcast_value, broadcast_qinfo); + *(output_ptr + x) = (*scalar_func)(!is_broadcast_input_2 ? bfs : afs, !is_broadcast_input_2 ? afs : bfs, output_qinfo); } }, broadcast_input, non_broadcast_input, output); } else { + const UniformQuantizationInfo input1_qinfo = in1->info()->quantization_info().uniform(); + const UniformQuantizationInfo input2_qinfo = in2->info()->quantization_info().uniform(); + // Input1 quantization info - const int32x4_t voffset1 = vdupq_n_s32(in1->info()->quantization_info().offset); - const float32x4_t vscale1 = vdupq_n_f32(in1->info()->quantization_info().scale); + const int32x4_t voffset1 = vdupq_n_s32(input1_qinfo.offset); + const float32x4_t vscale1 = vdupq_n_f32(input1_qinfo.scale); // Input2 quantization info - const int32x4_t voffset2 = vdupq_n_s32(in2->info()->quantization_info().offset); - const float32x4_t vscale2 = vdupq_n_f32(in2->info()->quantization_info().scale); + const int32x4_t voffset2 = vdupq_n_s32(input2_qinfo.offset); + const float32x4_t vscale2 = vdupq_n_f32(input2_qinfo.scale); // Clear X Dimension on execution window as we handle manually input1_win.set(Window::DimX, Window::Dimension(0, 1, 1)); input2_win.set(Window::DimX, Window::Dimension(0, 1, 1)); - const QuantizationInfo input1_qinfo = in1->info()->quantization_info(); - const QuantizationInfo input2_qinfo = in2->info()->quantization_info(); - Iterator input1(in1, input1_win); Iterator input2(in2, input2_win); Iterator output(out, win); @@ -667,9 +665,9 @@ void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *o vscale1, vscale2, voffseto, invvscaleo); for(; x < window_end_x; ++x) { - const float afs = scvt_f32_qasymm8(*(input1_ptr + x), input1_qinfo.scale, input1_qinfo.offset); - const float bfs = scvt_f32_qasymm8(*(input2_ptr + x), input2_qinfo.scale, input2_qinfo.offset); - *(output_ptr + x) = (*scalar_func)(afs, bfs, out->info()->quantization_info()); + const float afs = dequantize_qasymm8(*(input1_ptr + x), input1_qinfo); + const float bfs = dequantize_qasymm8(*(input2_ptr + x), input2_qinfo); + *(output_ptr + x) = (*scalar_func)(afs, bfs, output_qinfo); } }, input1, input2, output); diff --git a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp index e699bac556..d45e3ce56a 100644 --- a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp +++ b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp @@ -27,12 +27,9 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" -#include "arm_compute/core/Window.h" #include "support/ToolchainSupport.h" diff --git a/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp index cba3390641..0e77ead72b 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp @@ -179,8 +179,8 @@ void NEGEMMMatrixVectorMultiplyKernel::matrix_vector_multiplyinfo()->quantization_info().offset; - const int weights_offset = -_input1->info()->quantization_info().offset; + const int input_offset = -_input0->info()->quantization_info().uniform().offset; + const int weights_offset = -_input1->info()->quantization_info().uniform().offset; const int input_w = _input0->info()->dimension(0); const int input_h = _input0->info()->dimension(1); diff --git a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp index b8e204cfd8..8efab7da33 100644 --- a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp @@ -112,11 +112,11 @@ void NEHeightConcatenateLayerKernel::run(const Window &window, const ThreadInfo uint8_t *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes() + _height_offset * _output->info()->strides_in_bytes()[Window::DimY]; // Create iterators - Iterator input(_input, window); - Iterator output(_output, window); - const DataType dt = _input->info()->data_type(); - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); + Iterator input(_input, window); + Iterator output(_output, window); + const DataType dt = _input->info()->data_type(); + const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform(); if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo) { execute_window_loop(window, [&](const Coordinates &) diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index 34af0cf3fd..874259bbb7 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -279,7 +279,7 @@ void NEIm2ColKernel::run_im2col(const Window &window) const int pad_top = _conv_info.pad_top(); const int stride_x = _conv_info.stride().first; const int stride_y = _conv_info.stride().second; - const int pad_value = is_data_type_quantized(_input->info()->data_type()) ? _input->info()->quantization_info().offset : 0; + const int pad_value = is_data_type_quantized(_input->info()->data_type()) ? _input->info()->quantization_info().uniform().offset : 0; Window window_in_out(window); // The first three dimensions of the input and output are increased by the inner loops diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp index fa16484cd3..c313b23ad3 100644 --- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp +++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp @@ -174,7 +174,7 @@ inline uint16x8_t scale255_U16_U16(uint16x8_t in) } void mul_saturate_QASYMM8_QASYMM8_QASYMM8_n(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale, - const QuantizationInfo &input1_qua_info, const QuantizationInfo &input2_qua_info, const QuantizationInfo &output_qua_info) + const UniformQuantizationInfo &input1_qua_info, const UniformQuantizationInfo &input2_qua_info, const UniformQuantizationInfo &output_qua_info) { const auto input1 = static_cast(input1_ptr); const auto input2 = static_cast(input2_ptr); @@ -187,7 +187,7 @@ void mul_saturate_QASYMM8_QASYMM8_QASYMM8_n(const void *__restrict input1_ptr, c const float32x4x4_t in1_f32x4x4 = vdequantize(input1_q, input1_qua_info); const float32x4x4_t in2_f32x4x4 = vdequantize(input2_q, input2_qua_info); - const QuantizationInfo tmp_qua_info = QuantizationInfo(output_qua_info.scale / scale, output_qua_info.offset); + const UniformQuantizationInfo tmp_qua_info = { output_qua_info.scale / scale, output_qua_info.offset }; const float32x4x4_t out_f32x4x4 = { @@ -660,7 +660,7 @@ void NEPixelWiseMultiplicationKernel::run(const Window &window, const ThreadInfo execute_window_loop(collapsed, [&](const Coordinates &) { (*_func_qasymm8)(input1.ptr(), input2.ptr(), output.ptr(), _scale, - _input1->info()->quantization_info(), _input2->info()->quantization_info(), _output->info()->quantization_info()); + _input1->info()->quantization_info().uniform(), _input2->info()->quantization_info().uniform(), _output->info()->quantization_info().uniform()); collapsed.slide_window_slice_3D(slice_input1); collapsed.slide_window_slice_3D(slice_input2); }, diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp index ac2ffa1988..62c9ca0d5e 100644 --- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp @@ -562,6 +562,10 @@ void NEPoolingLayerKernel::pooling2_qasymm8_nchw(const Window &window_input, con const int scale_step_x = (pool_stride_x == 1) ? 2 : 1; + const UniformQuantizationInfo input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo output_qinfo = _output->info()->quantization_info().uniform(); + const bool have_different_qinfo = input_qinfo != output_qinfo; + execute_window_loop(window, [&](const Coordinates & id) { const auto top_data = vld1q_u8(reinterpret_cast(input_top_ptr + input.offset())); @@ -640,9 +644,7 @@ void NEPoolingLayerKernel::pooling2_qasymm8_nchw(const Window &window_input, con } } - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); - if(input_qinfo != output_qinfo) + if(have_different_qinfo) { const auto requantized_output = vquantize(vdequantize(vcombine_u8(lower_res, upper_res), input_qinfo), output_qinfo); lower_res = vget_low_u8(requantized_output); @@ -814,8 +816,8 @@ void NEPoolingLayerKernel::pooling3_qasymm8_nchw(const Window &window_input, con const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right); const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom); - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); + const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform(); const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast(pool_pad_left), -static_cast(pool_pad_top))); const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast(pool_pad_left), -static_cast(pool_pad_top) + 1)); @@ -1598,6 +1600,9 @@ void NEPoolingLayerKernel::poolingMxN_qasymm8_nchw(const Window &window_input, c const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right); const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom); + const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform(); + execute_window_loop(window, [&](const Coordinates & id) { uint8_t res = 0; @@ -1671,11 +1676,7 @@ void NEPoolingLayerKernel::poolingMxN_qasymm8_nchw(const Window &window_input, c } // Store result - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); - res = (input_qinfo != output_qinfo) ? sqcvt_qasymm8_f32(scvt_f32_qasymm8(res, input_qinfo.scale, input_qinfo.offset), output_qinfo.scale, - output_qinfo.offset) : - res; + res = (input_qinfo != output_qinfo) ? quantize_qasymm8(dequantize_qasymm8(res, input_qinfo), output_qinfo) : res; *(reinterpret_cast(output.ptr())) = res; }, input, output); @@ -1698,9 +1699,9 @@ void NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc(const Window &window_input, c const int upper_bound_w = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_right); const int upper_bound_h = _input->info()->dimension(2) + (exclude_padding ? 0 : pool_pad_bottom); - const float32x4_t half_scale_v = vdupq_n_f32(0.5f); - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); + const float32x4_t half_scale_v = vdupq_n_f32(0.5f); + const UniformQuantizationInfo input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo output_qinfo = _output->info()->quantization_info().uniform(); execute_window_loop(window, [&](const Coordinates & id) { diff --git a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp index 4deeb1c7cc..0aa34cd411 100644 --- a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp @@ -107,6 +107,7 @@ void NEQuantizationLayerKernel::quantize(const Window &window, const Quantizatio const auto window_start_x = static_cast(window.x().start()); const auto window_end_x = static_cast(window.x().end()); + const UniformQuantizationInfo uqinfo = qinfo.uniform(); #ifdef __aarch64__ constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN; #else //__aarch64__ @@ -127,12 +128,12 @@ void NEQuantizationLayerKernel::quantize(const Window &window, const Quantizatio int x = window_start_x; for(; x <= (window_end_x - window_step); x += window_step) { - wrapper::vstore(&output_ptr[x], vquantize(load_value(&input_ptr[x]), qinfo)); + wrapper::vstore(&output_ptr[x], vquantize(load_value(&input_ptr[x]), uqinfo)); } // Compute left-over elements for(; x < window_end_x; ++x) { - output_ptr[x] = qinfo.quantize(input_ptr[x], rounding_policy); + output_ptr[x] = quantize_qasymm8(input_ptr[x], uqinfo, rounding_policy); } }, input, output); diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp index c6e853659c..1bfef27d49 100644 --- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp +++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp @@ -542,6 +542,9 @@ struct RedOpX_qasymm8 inline void operator()(Iterator &input, Iterator &output, Window &in_slice, Window &out_slice, const TensorInfo &in_info, const ReductionOperation op) { ARM_COMPUTE_UNUSED(out_slice); + + const UniformQuantizationInfo iq_info = in_info.quantization_info().uniform(); + auto vec_res_value1 = vdupq_n_u32(static_cast(0.f)); auto vec_res_value2 = vdupq_n_u32(static_cast(0.f)); auto vec_res_value3 = vdupq_n_u32(static_cast(0.f)); @@ -584,8 +587,8 @@ struct RedOpX_qasymm8 } case ReductionOperation::PROD: { - const auto offset32x4f_4 = vdupq_n_f32(in_info.quantization_info().offset); - const auto scale32x4f_4 = vdupq_n_f32(in_info.quantization_info().scale); + const auto offset32x4f_4 = vdupq_n_f32(iq_info.offset); + const auto scale32x4f_4 = vdupq_n_f32(iq_info.scale); const auto temp16x8t_1 = vmovl_u8(vget_low_u8(vec_elements)); const auto temp16x8t_2 = vmovl_u8(vget_high_u8(vec_elements)); @@ -673,7 +676,7 @@ struct RedOpX_qasymm8 res *= wrapper::vgetlane(carry_res, 3); //re-quantize result - res = sqcvt_qasymm8_f32(res, in_info.quantization_info().scale, in_info.quantization_info().offset); + res = quantize_qasymm8(res, iq_info); *(output.ptr()) = static_cast(res); break; } @@ -877,6 +880,8 @@ struct RedOpYZW_qasymm8 { ARM_COMPUTE_UNUSED(out_slice); + const UniformQuantizationInfo iq_info = in_info.quantization_info().uniform(); + execute_window_loop(in_slice, [&](const Coordinates &) { uint32x4x4_t vec_res_idx{ { 0 } }; @@ -932,8 +937,8 @@ struct RedOpYZW_qasymm8 } case ReductionOperation::PROD: { - const auto offset32x4f_4 = vdupq_n_f32(in_info.quantization_info().offset); - const auto scale32x4f_4 = vdupq_n_f32(in_info.quantization_info().scale); + const auto offset32x4f_4 = vdupq_n_f32(iq_info.offset); + const auto scale32x4f_4 = vdupq_n_f32(iq_info.scale); const auto temp16x8t_1 = vmovl_u8(vget_low_u8(vec_elements)); const auto temp16x8t_2 = vmovl_u8(vget_high_u8(vec_elements)); @@ -1004,8 +1009,8 @@ struct RedOpYZW_qasymm8 } else if(op == ReductionOperation::PROD) { - const auto offset32x4f_4 = vdupq_n_f32(in_info.quantization_info().offset); - const auto iscale32x4f_4 = vinvq_f32(vdupq_n_f32(in_info.quantization_info().scale)); + const auto offset32x4f_4 = vdupq_n_f32(iq_info.offset); + const auto iscale32x4f_4 = vinvq_f32(vdupq_n_f32(iq_info.scale)); //re-quantize vec_res_value1_f = vaddq_f32(vmulq_f32(vec_res_value1_f, iscale32x4f_4), offset32x4f_4); diff --git a/src/core/NEON/kernels/NEReverseKernel.cpp b/src/core/NEON/kernels/NEReverseKernel.cpp index 36398cf89a..99328deecd 100644 --- a/src/core/NEON/kernels/NEReverseKernel.cpp +++ b/src/core/NEON/kernels/NEReverseKernel.cpp @@ -31,7 +31,6 @@ #include "arm_compute/core/NEON/NEFixedPoint.h" #include "arm_compute/core/NEON/NEMath.h" #include "arm_compute/core/NEON/wrapper/wrapper.h" -#include "arm_compute/core/QAsymm8.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp index 003f472486..e99b97bbe5 100644 --- a/src/core/NEON/kernels/NEScaleKernel.cpp +++ b/src/core/NEON/kernels/NEScaleKernel.cpp @@ -218,7 +218,7 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset const int input_height = input->info()->dimension(2); T border_value; - if(use_padding && border_mode != BorderMode::REPLICATE ) + if(use_padding && border_mode != BorderMode::REPLICATE) { // configure() sets top border to 0 for BorderMode::REPLICATE and border_value is not needed in execute_window_loop() for REPLICATE border_value = *reinterpret_cast(input->buffer() + input->info()->offset_first_element_in_bytes() - stride_w); @@ -235,9 +235,9 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset int border_size = (border_mode == BorderMode::UNDEFINED) ? 0 : 1; - const bool is_quantized = (input->info()->data_type() == DataType::QASYMM8); - const QuantizationInfo iq_info = input->info()->quantization_info(); - const QuantizationInfo oq_info = output->info()->quantization_info(); + const bool is_quantized = (input->info()->data_type() == DataType::QASYMM8); + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); execute_window_loop(window, [&](const Coordinates & id) { @@ -295,11 +295,11 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset //dequantize quantized input if(is_quantized) { - float inp00 = iq_info.dequantize(a00); - float inp01 = iq_info.dequantize(a01); - float inp10 = iq_info.dequantize(a10); - float inp11 = iq_info.dequantize(a11); - res = static_cast(oq_info.quantize((inp00 * w1 + inp01 * w2 + inp10 * w3 + inp11 * w4), RoundingPolicy::TO_NEAREST_UP)); + float inp00 = dequantize_qasymm8(a00, iq_info); + float inp01 = dequantize_qasymm8(a01, iq_info); + float inp10 = dequantize_qasymm8(a10, iq_info); + float inp11 = dequantize_qasymm8(a11, iq_info); + res = static_cast(quantize_qasymm8((inp00 * w1 + inp01 * w2 + inp10 * w3 + inp11 * w4), oq_info)); } else { @@ -651,9 +651,9 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window) const size_t in_stide_in_bytes = _input->info()->strides_in_bytes()[1]; const size_t in_stride = in_stide_in_bytes / _input->info()->element_size(); - const bool is_quantized = (_input->info()->data_type() == DataType::QASYMM8); - const QuantizationInfo iq_info = _input->info()->quantization_info(); - const QuantizationInfo oq_info = _output->info()->quantization_info(); + const bool is_quantized = (_input->info()->data_type() == DataType::QASYMM8); + const UniformQuantizationInfo iq_info = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = _output->info()->quantization_info().uniform(); switch(_input->info()->data_type()) { diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp index e9417ece44..4144a1877b 100644 --- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp +++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -595,7 +595,7 @@ void logits_1d_softmax_qasymm8(const ITensor &in, const ITensor &max, void *cons const int start_x = in.info()->valid_region().anchor.x(); const int input_width = in.info()->valid_region().shape.x(); - const float scale_beta = -beta * in.info()->quantization_info().scale; + const float scale_beta = -beta * in.info()->quantization_info().uniform().scale; Iterator in_it(&in, window); Iterator max_it(&max, window); diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp index aea6875f20..28f655c529 100644 --- a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp @@ -112,11 +112,11 @@ void NEWidthConcatenateLayerKernel::run(const Window &window, const ThreadInfo & uint8_t *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes() + _width_offset * _output->info()->strides_in_bytes()[0]; // Create iterators - Iterator input(_input, window); - Iterator output(_output, window); - const DataType dt = _input->info()->data_type(); - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); + Iterator input(_input, window); + Iterator output(_output, window); + const DataType dt = _input->info()->data_type(); + const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform(); if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo) { execute_window_loop(window, [&](const Coordinates &) diff --git a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp index 09a4a11b66..383c2b8b99 100644 --- a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp +++ b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp @@ -30,7 +30,6 @@ #include "arm_compute/core/NEON/NEFixedPoint.h" #include "arm_compute/core/NEON/NEMath.h" #include "arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h" -#include "arm_compute/core/QAsymm8.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" diff --git a/src/runtime/CL/CLSubTensor.cpp b/src/runtime/CL/CLSubTensor.cpp index d0e7d760ff..0f362507cf 100644 --- a/src/runtime/CL/CLSubTensor.cpp +++ b/src/runtime/CL/CLSubTensor.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -58,6 +58,11 @@ const cl::Buffer &CLSubTensor::cl_buffer() const return _parent->cl_buffer(); } +CLQuantization CLSubTensor::quantization() const +{ + return _parent->quantization(); +} + ICLTensor *CLSubTensor::parent() { return _parent; diff --git a/src/runtime/CL/CLTensor.cpp b/src/runtime/CL/CLTensor.cpp index dd277384c7..732689e7ec 100644 --- a/src/runtime/CL/CLTensor.cpp +++ b/src/runtime/CL/CLTensor.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -47,6 +47,11 @@ const cl::Buffer &CLTensor::cl_buffer() const return _allocator.cl_data(); } +CLQuantization CLTensor::quantization() const +{ + return _allocator.quantization(); +} + CLTensorAllocator *CLTensor::allocator() { return &_allocator; diff --git a/src/runtime/CL/CLTensorAllocator.cpp b/src/runtime/CL/CLTensorAllocator.cpp index 101e4f1cd4..63aa1ba9ea 100644 --- a/src/runtime/CL/CLTensorAllocator.cpp +++ b/src/runtime/CL/CLTensorAllocator.cpp @@ -34,6 +34,14 @@ const cl::Buffer CLTensorAllocator::_empty_buffer = cl::Buffer(); namespace { +/** Helper function used to allocate the backing memory of a tensor + * + * @param[in] context OpenCL context to use + * @param[in] size Size of the allocation + * @param[in] alignment Alignment of the allocation + * + * @return A wrapped memory region + */ std::unique_ptr allocate_region(const cl::Context &context, size_t size, cl_uint alignment) { // Try fine-grain SVM @@ -54,11 +62,47 @@ std::unique_ptr allocate_region(const cl::Context &context, siz } return region; } +/** Clears quantization arrays + * + * @param[in, out] scale Quantization scale array + * @param[in, out] offset Quantization offset array + */ +void clear_quantization_arrays(CLFloatArray &scale, CLInt32Array &offset) +{ + // Clear arrays + scale = CLFloatArray(); + offset = CLInt32Array(); +} +/** Helper function used to create quantization data arrays + * + * @param[in, out] scale Quantization scale array + * @param[in, out] offset Quantization offset array + * @param[in] qinfo Quantization info + * @param[in] pad_size Pad size to use in case array needs to be padded for computation purposes + * + * @return A pair (scale, offset) containing the respective allocated and filled arrays + */ +void populate_quantization_info(CLFloatArray &scale, CLInt32Array &offset, const QuantizationInfo &qinfo, size_t pad_size) +{ + clear_quantization_arrays(scale, offset); + + // Create scale array + const size_t num_elements = qinfo.scale.size(); + const size_t element_size = sizeof(decltype(qinfo.scale)::value_type); + scale = CLFloatArray(num_elements + pad_size); + scale.resize(num_elements); + CLScheduler::get().queue().enqueueWriteBuffer(scale.cl_buffer(), CL_TRUE, 0, num_elements * element_size, qinfo.scale.data()); +} } // namespace CLTensorAllocator::CLTensorAllocator(CLTensor *owner) - : _associated_memory_group(nullptr), _memory(), _mapping(nullptr), _owner(owner) + : _associated_memory_group(nullptr), _memory(), _mapping(nullptr), _owner(owner), _scale(), _offset() +{ +} + +CLQuantization CLTensorAllocator::quantization() const { + return { &_scale, &_offset }; } uint8_t *CLTensorAllocator::data() @@ -73,6 +117,7 @@ const cl::Buffer &CLTensorAllocator::cl_data() const void CLTensorAllocator::allocate() { + // Allocate tensor backing memory if(_associated_memory_group == nullptr) { if(_memory.region() != nullptr && _memory.cl_region()->cl_data().get() != nullptr) @@ -91,6 +136,15 @@ void CLTensorAllocator::allocate() { _associated_memory_group->finalize_memory(_owner, _memory, info().total_size()); } + + // Allocate and fill the quantization parameter arrays + if(info().data_type() == DataType::QSYMM8_PER_CHANNEL) + { + const size_t pad_size = 0; + populate_quantization_info(_scale, _offset, info().quantization_info(), pad_size); + } + + // Lock allocator info().set_is_resizable(false); } @@ -98,6 +152,7 @@ void CLTensorAllocator::free() { _mapping = nullptr; _memory.set_region(nullptr); + clear_quantization_arrays(_scale, _offset); info().set_is_resizable(true); } diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp index 97b0a01331..e912740d69 100644 --- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp @@ -130,7 +130,7 @@ void CLDepthwiseConvolutionLayer3x3::configure(ICLTensor *input, const ICLTensor PixelValue &&zero_value(0.f); if(is_data_type_quantized_asymmetric(input->info()->data_type())) { - zero_value = PixelValue(static_cast(input->info()->quantization_info().offset)); + zero_value = PixelValue(static_cast(input->info()->quantization_info().uniform().offset)); } _border_handler.configure(input_to_use, _kernel->border_size(), BorderMode::CONSTANT, zero_value); } @@ -288,6 +288,10 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w const size_t patch_size = weights_w * weights_h + ((append_bias) ? 1 : 0); const size_t conv_size = conv_w * conv_h; + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + // Im2Col configuration TensorShape shape_im2col = input->info()->tensor_shape(); shape_im2col.set(0, patch_size); @@ -319,9 +323,9 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w // Output staged configuration if(_is_quantized) { - const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info(); + const UniformQuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? iq_info : oq_info; - float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale; + float multiplier = iq_info.scale * wq_info.scale / output_quant_info.scale; int output_multiplier; int output_shift; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); @@ -334,8 +338,8 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w PixelValue zero_w(static_cast(0)); if(_is_quantized) { - zero_in = PixelValue(static_cast(input->info()->quantization_info().offset)); - zero_w = PixelValue(static_cast(weights->info()->quantization_info().offset)); + zero_in = PixelValue(static_cast(iq_info.offset)); + zero_w = PixelValue(static_cast(wq_info.offset)); } BorderSize border_size = _v2mm_kernel.border_size(); _v2mm_input_fill_border.configure(&_input_reshaped, border_size, BorderMode::CONSTANT, zero_in); diff --git a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp index c451bd4b4c..bfc6ff158c 100644 --- a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -49,7 +49,7 @@ void CLDirectConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weig PixelValue &&zero_value(0.f); if(is_data_type_quantized_asymmetric(input->info()->data_type())) { - zero_value = PixelValue(static_cast(input->info()->quantization_info().offset)); + zero_value = PixelValue(static_cast(input->info()->quantization_info().uniform().offset)); } _input_border_handler.configure(input, _direct_conv_kernel.border_size(), BorderMode::CONSTANT, zero_value); diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp index 7b9229c4ae..87d4c56a0e 100644 --- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -41,10 +41,13 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I { if(is_data_type_quantized_asymmetric(input.data_type())) { + const UniformQuantizationInfo iq_info = input.quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights.quantization_info().uniform(); + // Since we need negative offsets for computing convolution, we need to change QuantizationInfo() // Extract and negate input and weights offset - const QuantizationInfo input_quantization_info(input.quantization_info().scale, -input.quantization_info().offset); - const QuantizationInfo weights_quantization_info(weights.quantization_info().scale, -weights.quantization_info().offset); + const QuantizationInfo input_quantization_info(iq_info.scale, -iq_info.offset); + const QuantizationInfo weights_quantization_info(wq_info.scale, -wq_info.offset); // Validate gemmlowp function ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyCore::validate(&input.clone()->set_quantization_info(input_quantization_info), @@ -88,8 +91,8 @@ void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor const QuantizationInfo input_quantization_info = input->info()->quantization_info(); const QuantizationInfo weights_quantization_info = weights->info()->quantization_info(); - input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset)); - weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset)); + input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset)); + weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset)); // Configure gemmlowp function _mm_gemmlowp.configure(input, weights, nullptr, output); @@ -230,11 +233,15 @@ void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *w // Configure output stage for asymmetric quantized types if(_is_quantized) { - float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output->info()->quantization_info().scale; + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + + float multiplier = iq_info.scale * wq_info.scale / oq_info.scale; int output_multiplier; int output_shift; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); - _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, output_shift, output->info()->quantization_info().offset); + _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, output_shift, oq_info.offset); _gemmlowp_output.allocator()->allocate(); } } diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp index 03d516f703..4e518fcfd5 100644 --- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp @@ -115,8 +115,8 @@ void CLGEMMConvolutionLayer::configure_mm(const ICLTensor *input, const ICLTenso const QuantizationInfo input_quantization_info = input->info()->quantization_info(); const QuantizationInfo weights_quantization_info = weights->info()->quantization_info(); - input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset)); - weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset)); + input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset)); + weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset)); _mm_gemmlowp.configure(input, weights, biases, output, gemm_info); @@ -151,8 +151,8 @@ Status CLGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens std::unique_ptr input_qa = input->clone(); std::unique_ptr weights_qa = weights->clone(); - input_qa->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset)); - weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset)); + input_qa->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset)); + weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset)); // Perform validation step on GEMMLowp return CLGEMMLowpMatrixMultiplyCore::validate(input_qa.get(), weights_qa.get(), biases, output, gemm_info); @@ -190,6 +190,10 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * const unsigned int kernel_width = weights->info()->dimension(idx_width); const unsigned int kernel_height = weights->info()->dimension(idx_height); + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + _is_prepared = weights_info.retain_internal_weights(); _original_weights = weights; _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); @@ -281,9 +285,9 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * // Configure output stage for quantized case if(_is_quantized) { - const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info(); + const auto output_quant_info = (output->info()->total_size() == 0) ? iq_info : oq_info; - const float multiplier = (input->info()->quantization_info().scale * weights->info()->quantization_info().scale) / output_quant_info.scale; + const float multiplier = (iq_info.scale * wq_info.scale) / output_quant_info.scale; int output_multiplier = 0; int output_shift = 0; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); @@ -298,8 +302,8 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * if(_is_activationlayer_enabled && supported_acts.count(act_info.activation()) != 0) { - const int a_const_int = output_quant_info.quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP); - const int b_const_int = output_quant_info.quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP); + const int a_const_int = quantize_qasymm8(act_info.a(), output_quant_info); + const int b_const_int = quantize_qasymm8(act_info.b(), output_quant_info); min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? output_quant_info.offset : b_const_int; max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? 255 : a_const_int; @@ -387,6 +391,10 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI // In case of F16, fused bias will be used in GEMM const bool run_addition = (skip_im2col) && (append_bias) && (data_type != DataType::F16); + const UniformQuantizationInfo iq_info = input->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->quantization_info().uniform(); + ARM_COMPUTE_RETURN_ERROR_ON((weights->dimension(idx_channel) * num_groups) != input->dimension(idx_channel)); ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4); @@ -468,9 +476,9 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI if(is_quantized) { - const QuantizationInfo output_quant_info = (output->total_size() == 0) ? input->quantization_info() : output->quantization_info(); + const auto output_quant_info = (output->total_size() == 0) ? iq_info : oq_info; - const float multiplier = (input->quantization_info().scale * weights->quantization_info().scale) / output_quant_info.scale; + const float multiplier = (iq_info.scale * wq_info.scale) / output_quant_info.scale; int output_multiplier = 0; int output_shift = 0; @@ -486,8 +494,8 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI if(is_activationlayer_enabled && supported_acts.count(act_info.activation()) != 0) { - const int a_const_int = output_quant_info.quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP); - const int b_const_int = output_quant_info.quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP); + const int a_const_int = quantize_qasymm8(act_info.a(), output_quant_info); + const int b_const_int = quantize_qasymm8(act_info.b(), output_quant_info); min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? output_quant_info.offset : b_const_int; max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? 255 : a_const_int; diff --git a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp index bcb91e052c..36a120e4ef 100644 --- a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp @@ -277,11 +277,15 @@ void CLGEMMDeconvolutionLayer::configure(const ICLTensor *input, const ICLTensor if(_is_quantized) { - float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / _gemmlowp_final.info()->quantization_info().scale; + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = _gemmlowp_final.info()->quantization_info().uniform(); + + float multiplier = iq_info.scale * wq_info.scale / oq_info.scale; int output_multiplier(0); int output_shift(0); quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); - _gemmlowp_output_stage.configure(&_gemmlowp_final, nullptr, output_stage_output, output_multiplier, output_shift, _gemmlowp_final.info()->quantization_info().offset); + _gemmlowp_output_stage.configure(&_gemmlowp_final, nullptr, output_stage_output, output_multiplier, output_shift, oq_info.offset); _gemmlowp_final.allocator()->allocate(); } diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp index 049db1d461..875e3a2a00 100644 --- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp @@ -77,8 +77,8 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor _is_prepared = false; _original_b = b; _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run(); - _a_offset = a->info()->quantization_info().offset; - _b_offset = b->info()->quantization_info().offset; + _a_offset = a->info()->quantization_info().uniform().offset; + _b_offset = b->info()->quantization_info().uniform().offset; // Get the GPU target const GPUTarget gpu_target = CLScheduler::get().target(); @@ -213,8 +213,8 @@ Status CLGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), "Matrix A already reshaped is not supported"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(), "Matrix B already reshaped is not supported"); - int32_t a_offset = a->quantization_info().offset; - int32_t b_offset = b->quantization_info().offset; + int32_t a_offset = a->quantization_info().uniform().offset; + int32_t b_offset = b->quantization_info().uniform().offset; const ITensorInfo *matrix_a_info = a; const ITensorInfo *matrix_b_info = b; diff --git a/src/runtime/CL/functions/CLPoolingLayer.cpp b/src/runtime/CL/functions/CLPoolingLayer.cpp index cbe1ce3b47..086017a7fd 100644 --- a/src/runtime/CL/functions/CLPoolingLayer.cpp +++ b/src/runtime/CL/functions/CLPoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -45,7 +45,7 @@ void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const Poolin PixelValue pixel_value(0.f); if(is_data_type_quantized_asymmetric(input->info()->data_type()) && !pool_info.exclude_padding()) { - pixel_value = PixelValue(static_cast(input->info()->quantization_info().offset)); + pixel_value = PixelValue(static_cast(input->info()->quantization_info().uniform().offset)); } switch(input->info()->data_layout()) { diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp index 3bb69b1ffc..4bc8439d93 100644 --- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp @@ -72,7 +72,7 @@ void NEDepthwiseConvolutionLayer3x3::configure_generic(ITensor _memory_group.manage(&_accumulator); _accumulator.allocator()->init(TensorInfo(accum_shape, 1, DataType::S32, output->info()->quantization_info())); _accumulator.info()->set_data_layout(accum_layout); - zero_value = PixelValue(static_cast(input->info()->quantization_info().offset)); + zero_value = PixelValue(static_cast(input->info()->quantization_info().uniform().offset)); } if(!_is_nchw) @@ -109,13 +109,15 @@ void NEDepthwiseConvolutionLayer3x3::configure_generic(ITensor // Configure biases accumulation if(_is_quantized) { - const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info(); + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = (output->info()->total_size() == 0) ? iq_info : output->info()->quantization_info().uniform(); - float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale; + float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale; int output_multiplier; int output_shift; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); - _output_stage_kernel.configure(&_accumulator, biases, _is_nchw ? output : &_permuted_output, output_multiplier, output_shift, output_quant_info.offset); + _output_stage_kernel.configure(&_accumulator, biases, _is_nchw ? output : &_permuted_output, output_multiplier, output_shift, oq_info.offset); _accumulator.allocator()->allocate(); } else if(_has_bias) @@ -459,13 +461,15 @@ void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weigh // Output staged configuration if(_is_quantized) { - const QuantizationInfo output_quant_info = output->info()->quantization_info(); + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); - float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale; + float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale; int output_multiplier; int output_shift; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); - _output_stage_kernel.configure(&_output_reshaped, biases, output_to_use, output_multiplier, output_shift, output_quant_info.offset); + _output_stage_kernel.configure(&_output_reshaped, biases, output_to_use, output_multiplier, output_shift, oq_info.offset); _output_reshaped.allocator()->allocate(); } @@ -483,8 +487,8 @@ void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weigh PixelValue zero_w(static_cast(0)); if(_is_quantized) { - zero_in = PixelValue(static_cast(input->info()->quantization_info().offset)); - zero_w = PixelValue(static_cast(weights->info()->quantization_info().offset)); + zero_in = PixelValue(static_cast(input->info()->quantization_info().uniform().offset)); + zero_w = PixelValue(static_cast(weights->info()->quantization_info().uniform().offset)); } BorderSize border_size = _v2mm_kernel.border_size(); _v2mm_input_fill_border.configure(&_input_reshaped, border_size, BorderMode::CONSTANT, zero_in); diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp index e1a17db6d4..7a74a7ea90 100644 --- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp @@ -44,8 +44,8 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I { // Since we need negative offsets for computing convolution, we need to change QuantizationInfo() // Extract and negate input and weights offset - const QuantizationInfo input_quantization_info(input.quantization_info().scale, -input.quantization_info().offset); - const QuantizationInfo weights_quantization_info(weights.quantization_info().scale, -weights.quantization_info().offset); + const QuantizationInfo input_quantization_info(input.quantization_info().uniform().scale, -input.quantization_info().uniform().offset); + const QuantizationInfo weights_quantization_info(weights.quantization_info().uniform().scale, -weights.quantization_info().uniform().offset); // Validate gemmlowp function ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyCore::validate(&input.clone()->set_quantization_info(input_quantization_info), @@ -90,8 +90,8 @@ void NEFullyConnectedLayer::configure_mm(const ITensor *input, const ITensor *we const QuantizationInfo input_quantization_info = input->info()->quantization_info(); const QuantizationInfo weights_quantization_info = weights->info()->quantization_info(); - input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset)); - weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset)); + input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset)); + weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset)); // Configure gemmlowp function _mm_gemmlowp.configure(input, weights, nullptr, output); @@ -227,11 +227,15 @@ void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weigh // Configure output stage for asymmetric quantized types if(_is_quantized) { - float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output->info()->quantization_info().scale; + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + + float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale; int output_multiplier; int output_shift; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); - _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, output_shift, output->info()->quantization_info().offset); + _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, output_shift, oq_info.offset); _gemmlowp_output.allocator()->allocate(); } diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp index a2c4e8a8b1..c011ddd18f 100644 --- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp @@ -109,15 +109,15 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w { // Since we need negative offsets for computing convolution, we need to change QuantizationInfo() // Extract and negate input and weights offset - const QuantizationInfo input_quantization_info = input->info()->quantization_info(); - const QuantizationInfo weights_quantization_info = weights->info()->quantization_info(); + const UniformQuantizationInfo iqinfo = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wqinfo = weights->info()->quantization_info().uniform(); - input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset)); - weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset)); + input->info()->set_quantization_info(QuantizationInfo(iqinfo.scale, -iqinfo.offset)); + weights->info()->set_quantization_info(QuantizationInfo(wqinfo.scale, -wqinfo.offset)); - const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input_quantization_info : output->info()->quantization_info(); + const UniformQuantizationInfo oqinfo = (output->info()->total_size() == 0) ? iqinfo : output->info()->quantization_info().uniform(); - float multiplier = input_quantization_info.scale * weights->info()->quantization_info().scale / output_quant_info.scale; + float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale; int output_multiplier; int output_shift; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); @@ -132,10 +132,10 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w }; if(_is_activationlayer_enabled && supported_acts.count(act_info.activation()) != 0) { - const int a_const_int = output_quant_info.quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP); - const int b_const_int = output_quant_info.quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP); + const int a_const_int = quantize_qasymm8(act_info.a(), oqinfo); + const int b_const_int = quantize_qasymm8(act_info.b(), oqinfo); - min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? output_quant_info.offset : b_const_int; + min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? oqinfo.offset : b_const_int; max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? 255 : a_const_int; _is_activationlayer_enabled = false; @@ -143,7 +143,7 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w GEMMLowpOutputStageInfo output_info; output_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; - output_info.gemmlowp_offset = output_quant_info.offset; + output_info.gemmlowp_offset = oqinfo.offset; output_info.gemmlowp_multiplier = output_multiplier; output_info.gemmlowp_shift = output_shift; output_info.gemmlowp_min_bound = min_activation; @@ -152,8 +152,8 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w _mm_gemmlowp.configure(input, weights, biases, output, GEMMInfo(false, false, true, gemm_3d_depth, _skip_im2col, false, output_info)); // Revert back QuantizatioInfo as input and weights could be used in other convolution layers - input->info()->set_quantization_info(input_quantization_info); - weights->info()->set_quantization_info(weights_quantization_info); + input->info()->set_quantization_info(QuantizationInfo(iqinfo.scale, iqinfo.offset)); + weights->info()->set_quantization_info(QuantizationInfo(wqinfo.scale, wqinfo.offset)); } else { @@ -174,17 +174,17 @@ Status NEGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens { // Since we need negative offsets for computing convolution, we need to change QuantizationInfo() // Extract and negate input and weights offset - const QuantizationInfo input_quantization_info = input->quantization_info(); - const QuantizationInfo weights_quantization_info = weights->quantization_info(); + const UniformQuantizationInfo iqinfo = input->quantization_info().uniform(); + const UniformQuantizationInfo wqinfo = weights->quantization_info().uniform(); std::unique_ptr input_qa = input->clone(); std::unique_ptr weights_qa = weights->clone(); - input_qa->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset)); - weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset)); + input_qa->set_quantization_info(QuantizationInfo(iqinfo.scale, -iqinfo.offset)); + weights_qa->set_quantization_info(QuantizationInfo(wqinfo.scale, -wqinfo.offset)); - const QuantizationInfo output_quant_info = (output->total_size() == 0) ? input_quantization_info : output->quantization_info(); + const UniformQuantizationInfo oqinfo = (output->total_size() == 0) ? iqinfo : output->quantization_info().uniform(); - float multiplier = input_quantization_info.scale * weights->quantization_info().scale / output_quant_info.scale; + float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale; int output_multiplier; int output_shift; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); @@ -199,16 +199,16 @@ Status NEGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens }; if(is_activation_enabled && supported_acts.count(act_info.activation()) != 0) { - const int a_const_int = output_quant_info.quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP); - const int b_const_int = output_quant_info.quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP); + const int a_const_int = quantize_qasymm8(act_info.a(), oqinfo); + const int b_const_int = quantize_qasymm8(act_info.b(), oqinfo); - min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? output_quant_info.offset : b_const_int; + min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? oqinfo.offset : b_const_int; max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? 255 : a_const_int; } GEMMLowpOutputStageInfo output_info; output_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; - output_info.gemmlowp_offset = output_quant_info.offset; + output_info.gemmlowp_offset = oqinfo.offset; output_info.gemmlowp_multiplier = output_multiplier; output_info.gemmlowp_shift = output_shift; output_info.gemmlowp_min_bound = min_activation; diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp index 54f49a6707..d8773e37ab 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp @@ -61,8 +61,8 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, _mtx_b_reshape_kernel = nullptr; // Set internal variables - _a_offset = a->info()->quantization_info().offset; - _b_offset = b->info()->quantization_info().offset; + _a_offset = a->info()->quantization_info().uniform().offset; + _b_offset = b->info()->quantization_info().uniform().offset; _run_vector_matrix_multiplication = a->info()->dimension(1) < 2; _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run(); _is_prepared = false; @@ -224,8 +224,8 @@ Status NEGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso TensorInfo tmp_b_info{}; TensorInfo mm_result_s32_info{}; - int32_t a_offset = a->quantization_info().offset; - int32_t b_offset = b->quantization_info().offset; + int32_t a_offset = a->quantization_info().uniform().offset; + int32_t b_offset = b->quantization_info().uniform().offset; const bool reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run(); bool fuse_output_stage = gemm_info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE; diff --git a/src/runtime/NEON/functions/NEPoolingLayer.cpp b/src/runtime/NEON/functions/NEPoolingLayer.cpp index cbfd68485f..d92086d08d 100644 --- a/src/runtime/NEON/functions/NEPoolingLayer.cpp +++ b/src/runtime/NEON/functions/NEPoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -55,7 +55,7 @@ void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLay PixelValue zero_value(0.f); if(is_data_type_quantized_asymmetric(input->info()->data_type()) && !pool_info.exclude_padding()) { - zero_value = PixelValue(static_cast(input->info()->quantization_info().offset)); + zero_value = PixelValue(static_cast(input->info()->quantization_info().uniform().offset)); } _border_handler.configure(input, _pooling_layer_kernel.border_size(), border_mode, zero_value); break; diff --git a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp index 049bf66689..0499d9930f 100644 --- a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp +++ b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp @@ -72,9 +72,9 @@ std::unique_ptr create_convolver(const ITensor // Create quantized convolver if(data_type == DataType::QASYMM8) { - const QuantizationInfo &input_qinfo = input->info()->quantization_info(); - const QuantizationInfo &weights_qinfo = weights->info()->quantization_info(); - const QuantizationInfo &output_qinfo = output->info()->quantization_info(); + const UniformQuantizationInfo input_qinfo = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo weights_qinfo = weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo output_qinfo = output->info()->quantization_info().uniform(); // Check that quantization info are in the range [0, 255] ARM_COMPUTE_ERROR_ON(input_qinfo.offset < 0 || input_qinfo.offset > 255); diff --git a/tests/Utils.h b/tests/Utils.h index 7c55a3ef50..d6e4a88e77 100644 --- a/tests/Utils.h +++ b/tests/Utils.h @@ -355,6 +355,8 @@ void store_value_with_data_type(void *ptr, T value, DataType data_type) *reinterpret_cast(ptr) = value; break; case DataType::S8: + case DataType::QSYMM8: + case DataType::QSYMM8_PER_CHANNEL: *reinterpret_cast(ptr) = value; break; case DataType::U16: diff --git a/tests/validate_examples/graph_convolution.cpp b/tests/validate_examples/graph_convolution.cpp index b17cb2efdd..1ab6691e57 100644 --- a/tests/validate_examples/graph_convolution.cpp +++ b/tests/validate_examples/graph_convolution.cpp @@ -158,30 +158,27 @@ public: */ void consume_parameters(ExampleParams &common_params) { - common_params.input.width = width->value(); - common_params.input.height = height->value(); - common_params.input.fm = channels->value(); - common_params.input.batch = batch->value(); - common_params.input.quant_info.scale = scale->value(); - common_params.input.quant_info.offset = offset->value(); - common_params.input.npy = input_npy->value(); - common_params.input.range_low = input_range_low->value(); - common_params.input.range_high = input_range_high->value(); - - common_params.weights.width = weights_width->value(); - common_params.weights.height = weights_height->value(); - common_params.weights.fm = OFM->value(); - common_params.weights.npy = weights_npy->value(); - common_params.weights.quant_info.scale = weights_scale->value(); - common_params.weights.quant_info.offset = weights_offset->value(); - common_params.weights.range_low = weights_range_low->value(); - common_params.weights.range_high = weights_range_high->value(); + common_params.input.width = width->value(); + common_params.input.height = height->value(); + common_params.input.fm = channels->value(); + common_params.input.batch = batch->value(); + common_params.input.quant_info = QuantizationInfo(scale->value(), offset->value()); + common_params.input.npy = input_npy->value(); + common_params.input.range_low = input_range_low->value(); + common_params.input.range_high = input_range_high->value(); + + common_params.weights.width = weights_width->value(); + common_params.weights.height = weights_height->value(); + common_params.weights.fm = OFM->value(); + common_params.weights.npy = weights_npy->value(); + common_params.weights.quant_info = QuantizationInfo(weights_scale->value(), weights_offset->value()); + common_params.weights.range_low = weights_range_low->value(); + common_params.weights.range_high = weights_range_high->value(); common_params.bias.npy = bias_npy->value(); - common_params.output.quant_info.scale = output_scale->value(); - common_params.output.quant_info.offset = output_offset->value(); - common_params.output.npy = output_npy->value(); + common_params.output.quant_info = QuantizationInfo(output_scale->value(), output_offset->value()); + common_params.output.npy = output_npy->value(); common_params.convolution.padding_mode = padding_mode->value(); common_params.convolution.padding_top = padding_top->value(); diff --git a/tests/validate_examples/graph_depthwiseconvolution.cpp b/tests/validate_examples/graph_depthwiseconvolution.cpp index 1f5627a10d..3ea33e1deb 100644 --- a/tests/validate_examples/graph_depthwiseconvolution.cpp +++ b/tests/validate_examples/graph_depthwiseconvolution.cpp @@ -158,29 +158,26 @@ public: */ void consume_parameters(ExampleParams &common_params) { - common_params.input.width = width->value(); - common_params.input.height = height->value(); - common_params.input.fm = channels->value(); - common_params.input.batch = batch->value(); - common_params.input.quant_info.scale = scale->value(); - common_params.input.quant_info.offset = offset->value(); - common_params.input.npy = input_npy->value(); - common_params.input.range_low = input_range_low->value(); - common_params.input.range_high = input_range_high->value(); - - common_params.weights.width = weights_width->value(); - common_params.weights.height = weights_height->value(); - common_params.weights.npy = weights_npy->value(); - common_params.weights.range_low = weights_range_low->value(); - common_params.weights.range_high = weights_range_high->value(); - common_params.weights.quant_info.scale = weights_scale->value(); - common_params.weights.quant_info.offset = weights_offset->value(); + common_params.input.width = width->value(); + common_params.input.height = height->value(); + common_params.input.fm = channels->value(); + common_params.input.batch = batch->value(); + common_params.input.quant_info = QuantizationInfo(scale->value(), offset->value()); + common_params.input.npy = input_npy->value(); + common_params.input.range_low = input_range_low->value(); + common_params.input.range_high = input_range_high->value(); + + common_params.weights.width = weights_width->value(); + common_params.weights.height = weights_height->value(); + common_params.weights.npy = weights_npy->value(); + common_params.weights.range_low = weights_range_low->value(); + common_params.weights.range_high = weights_range_high->value(); + common_params.weights.quant_info = QuantizationInfo(weights_scale->value(), weights_offset->value()); common_params.bias.npy = bias_npy->value(); - common_params.output.quant_info.scale = output_scale->value(); - common_params.output.quant_info.offset = output_offset->value(); - common_params.output.npy = output_npy->value(); + common_params.output.quant_info = QuantizationInfo(output_scale->value(), output_offset->value()); + common_params.output.npy = output_npy->value(); common_params.convolution.padding_mode = padding_mode->value(); common_params.convolution.padding_top = padding_top->value(); diff --git a/tests/validate_examples/graph_fully_connected.cpp b/tests/validate_examples/graph_fully_connected.cpp index dfa15edd6d..645fa8b124 100644 --- a/tests/validate_examples/graph_fully_connected.cpp +++ b/tests/validate_examples/graph_fully_connected.cpp @@ -102,20 +102,17 @@ public: */ void consume_parameters(ExampleParams &common_params) { - common_params.input.width = width->value(); - common_params.input.batch = batch->value(); - common_params.input.quant_info.scale = input_scale->value(); - common_params.input.quant_info.offset = input_offset->value(); - common_params.input.range_low = input_range_low->value(); - common_params.input.range_high = input_range_high->value(); - - common_params.weights.quant_info.scale = weights_scale->value(); - common_params.weights.quant_info.offset = weights_offset->value(); - common_params.weights.range_low = weights_range_low->value(); - common_params.weights.range_high = weights_range_high->value(); - - common_params.output.quant_info.scale = output_scale->value(); - common_params.output.quant_info.offset = output_offset->value(); + common_params.input.width = width->value(); + common_params.input.batch = batch->value(); + common_params.input.quant_info = QuantizationInfo(input_scale->value(), input_offset->value()); + common_params.input.range_low = input_range_low->value(); + common_params.input.range_high = input_range_high->value(); + + common_params.weights.quant_info = QuantizationInfo(weights_scale->value(), weights_offset->value()); + common_params.weights.range_low = weights_range_low->value(); + common_params.weights.range_high = weights_range_high->value(); + + common_params.output.quant_info = QuantizationInfo(output_scale->value(), output_offset->value()); common_params.data_type = data_type->value(); common_params.fully_connected.num_outputs = num_outputs->value(); diff --git a/tests/validate_examples/graph_validate_utils.h b/tests/validate_examples/graph_validate_utils.h index 485d3c1409..13cc4fa683 100644 --- a/tests/validate_examples/graph_validate_utils.h +++ b/tests/validate_examples/graph_validate_utils.h @@ -453,16 +453,17 @@ public: { ARM_COMPUTE_ERROR_ON(tensor.data_type() != arm_compute::DataType::QASYMM8); - std::mt19937 gen(seed); + const UniformQuantizationInfo qinfo = tensor.quantization_info().uniform(); - uint8_t qasymm8_low = tensor.quantization_info().quantize(low, RoundingPolicy::TO_NEAREST_UP); - uint8_t qasymm8_high = tensor.quantization_info().quantize(high, RoundingPolicy::TO_NEAREST_UP); + uint8_t qasymm8_low = quantize_qasymm8(low, qinfo); + uint8_t qasymm8_high = quantize_qasymm8(high, qinfo); + std::mt19937 gen(seed); std::uniform_int_distribution distribution(qasymm8_low, qasymm8_high); for(int i = 0; i < tensor.num_elements(); ++i) { - tensor[i] = tensor.quantization_info().quantize(distribution(gen), RoundingPolicy::TO_NEAREST_UP); + tensor[i] = quantize_qasymm8(distribution(gen), qinfo); } } /** Fill S32 tensor with Random values. diff --git a/tests/validation/CL/UNIT/TensorAllocator.cpp b/tests/validation/CL/UNIT/TensorAllocator.cpp index e5b37d8387..4b8e105240 100644 --- a/tests/validation/CL/UNIT/TensorAllocator.cpp +++ b/tests/validation/CL/UNIT/TensorAllocator.cpp @@ -66,6 +66,7 @@ TEST_SUITE(CL) TEST_SUITE(UNIT) TEST_SUITE(TensorAllocator) +/** Validates import memory interface when importing cl buffer objects */ TEST_CASE(ImportMemoryBuffer, framework::DatasetMode::ALL) { // Init tensor info @@ -106,6 +107,7 @@ TEST_CASE(ImportMemoryBuffer, framework::DatasetMode::ALL) ARM_COMPUTE_EXPECT(t4.cl_buffer().get() != buf.get(), framework::LogLevel::ERRORS); } +/** Validates import memory interface when importing malloced memory */ TEST_CASE(ImportMemoryMalloc, framework::DatasetMode::ALL) { // Check if import extension is supported @@ -168,6 +170,7 @@ TEST_CASE(ImportMemoryMalloc, framework::DatasetMode::ALL) } #if !defined(BARE_METAL) +/** Validates import memory interface when importing memory mapped objects */ TEST_CASE(ImportMemoryMappedFile, framework::DatasetMode::ALL) { // Check if import extension is supported @@ -235,6 +238,45 @@ TEST_CASE(ImportMemoryMappedFile, framework::DatasetMode::ALL) } #endif // !defined(BARE_METAL) +/** Validates symmetric per channel quantization */ +TEST_CASE(Symm8PerChannelQuantizationInfo, framework::DatasetMode::ALL) +{ + // Create tensor + CLTensor tensor; + const std::vector scale = { 0.25f, 1.4f, 3.2f, 2.3f, 4.7f }; + const TensorInfo info(TensorShape(32U, 16U), 1, DataType::QSYMM8_PER_CHANNEL, QuantizationInfo(scale)); + tensor.allocator()->init(info); + + // Check quantization information + ARM_COMPUTE_EXPECT(!tensor.info()->quantization_info().empty(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!tensor.info()->quantization_info().scale.empty(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(tensor.info()->quantization_info().scale.size() == scale.size(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(tensor.info()->quantization_info().offset.empty(), framework::LogLevel::ERRORS); + + CLQuantization quantization = tensor.quantization(); + ARM_COMPUTE_ASSERT(quantization.scale != nullptr); + ARM_COMPUTE_ASSERT(quantization.offset != nullptr); + + // Check OpenCL quantization arrays before allocating + ARM_COMPUTE_EXPECT(quantization.scale->max_num_values() == 0, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(quantization.offset->max_num_values() == 0, framework::LogLevel::ERRORS); + + // Check OpenCL quantization arrays after allocating + tensor.allocator()->allocate(); + ARM_COMPUTE_EXPECT(quantization.scale->max_num_values() == scale.size(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(quantization.offset->max_num_values() == 0, framework::LogLevel::ERRORS); + + // Validate that the scale values are the same + auto cl_scale_buffer = quantization.scale->cl_buffer(); + void *mapped_ptr = CLScheduler::get().queue().enqueueMapBuffer(cl_scale_buffer, CL_TRUE, CL_MAP_READ, 0, scale.size()); + auto cl_scale_ptr = static_cast(mapped_ptr); + for(unsigned int i = 0; i < scale.size(); ++i) + { + ARM_COMPUTE_EXPECT(cl_scale_ptr[i] == scale[i], framework::LogLevel::ERRORS); + } + CLScheduler::get().queue().enqueueUnmapMemObject(cl_scale_buffer, mapped_ptr); +} + TEST_SUITE_END() // TensorAllocator TEST_SUITE_END() // UNIT TEST_SUITE_END() // CL diff --git a/tests/validation/Helpers.cpp b/tests/validation/Helpers.cpp index 71a674b515..31d6bfae07 100644 --- a/tests/validation/Helpers.cpp +++ b/tests/validation/Helpers.cpp @@ -110,22 +110,24 @@ CannyEdgeParameters canny_edge_parameters() SimpleTensor convert_from_asymmetric(const SimpleTensor &src) { - const QuantizationInfo &quantization_info = src.quantization_info(); - SimpleTensor dst{ src.shape(), DataType::F32, 1, QuantizationInfo(), src.data_layout() }; + const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform(); + SimpleTensor dst{ src.shape(), DataType::F32, 1, QuantizationInfo(), src.data_layout() }; for(int i = 0; i < src.num_elements(); ++i) { - dst[i] = quantization_info.dequantize(src[i]); + dst[i] = dequantize_qasymm8(src[i], quantization_info); } return dst; } SimpleTensor convert_to_asymmetric(const SimpleTensor &src, const QuantizationInfo &quantization_info) { - SimpleTensor dst{ src.shape(), DataType::QASYMM8, 1, quantization_info }; + SimpleTensor dst{ src.shape(), DataType::QASYMM8, 1, quantization_info }; + const UniformQuantizationInfo &qinfo = quantization_info.uniform(); + for(int i = 0; i < src.num_elements(); ++i) { - dst[i] = quantization_info.quantize(src[i], RoundingPolicy::TO_NEAREST_UP); + dst[i] = quantize_qasymm8(src[i], qinfo); } return dst; } @@ -267,8 +269,8 @@ std::pair get_quantized_bounds(const QuantizationInfo &quant_info, flo { ARM_COMPUTE_ERROR_ON_MSG(min > max, "min must be lower equal than max"); - const int min_bound = quant_info.quantize(min, RoundingPolicy::TO_NEAREST_UP); - const int max_bound = quant_info.quantize(max, RoundingPolicy::TO_NEAREST_UP); + const int min_bound = quantize_qasymm8(min, quant_info.uniform()); + const int max_bound = quantize_qasymm8(max, quant_info.uniform()); return std::pair { min_bound, max_bound }; } diff --git a/tests/validation/UNIT/TensorInfo.cpp b/tests/validation/UNIT/TensorInfo.cpp index b78f656932..96d07da2b4 100644 --- a/tests/validation/UNIT/TensorInfo.cpp +++ b/tests/validation/UNIT/TensorInfo.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -122,6 +122,68 @@ TEST_CASE(TensorInfoBuild, framework::DatasetMode::ALL) ARM_COMPUTE_EXPECT(info.tensor_shape() == TensorShape(13U, 15U), framework::LogLevel::ERRORS); } +/** Validates empty quantization info */ +TEST_CASE(NoQuantizationInfo, framework::DatasetMode::ALL) +{ + // Create tensor info + const TensorInfo info(TensorShape(32U, 16U), 1, DataType::F32); + + // Check quantization information + ARM_COMPUTE_EXPECT(info.quantization_info().empty(), framework::LogLevel::ERRORS); +} + +/** Validates symmetric quantization info */ +TEST_CASE(SymmQuantizationInfo, framework::DatasetMode::ALL) +{ + // Create tensor info + const float scale = 0.25f; + const TensorInfo info(TensorShape(32U, 16U), 1, DataType::QSYMM8, QuantizationInfo(scale)); + + // Check quantization information + ARM_COMPUTE_EXPECT(!info.quantization_info().empty(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!info.quantization_info().scale.empty(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(info.quantization_info().scale.size() == 1, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(info.quantization_info().offset.empty(), framework::LogLevel::ERRORS); + + UniformQuantizationInfo qinfo = info.quantization_info().uniform(); + ARM_COMPUTE_EXPECT(qinfo.scale == scale, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(qinfo.offset == 0.f, framework::LogLevel::ERRORS); +} + +/** Validates asymmetric quantization info */ +TEST_CASE(AsymmQuantizationInfo, framework::DatasetMode::ALL) +{ + // Create tensor info + const float scale = 0.25f; + const int32_t offset = 126; + const TensorInfo info(TensorShape(32U, 16U), 1, DataType::QSYMM8, QuantizationInfo(scale, offset)); + + // Check quantization information + ARM_COMPUTE_EXPECT(!info.quantization_info().empty(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!info.quantization_info().scale.empty(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(info.quantization_info().scale.size() == 1, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!info.quantization_info().offset.empty(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(info.quantization_info().offset.size() == 1, framework::LogLevel::ERRORS); + + UniformQuantizationInfo qinfo = info.quantization_info().uniform(); + ARM_COMPUTE_EXPECT(qinfo.scale == scale, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(qinfo.offset == offset, framework::LogLevel::ERRORS); +} + +/** Validates symmetric per channel quantization info */ +TEST_CASE(SymmPerChannelQuantizationInfo, framework::DatasetMode::ALL) +{ + // Create tensor info + const std::vector scale = { 0.25f, 1.4f, 3.2f, 2.3f, 4.7f }; + const TensorInfo info(TensorShape(32U, 16U), 1, DataType::QSYMM8_PER_CHANNEL, QuantizationInfo(scale)); + + // Check quantization information + ARM_COMPUTE_EXPECT(!info.quantization_info().empty(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!info.quantization_info().scale.empty(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(info.quantization_info().scale.size() == scale.size(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(info.quantization_info().offset.empty(), framework::LogLevel::ERRORS); +} + TEST_SUITE_END() // TensorInfoValidation TEST_SUITE_END() } // namespace validation diff --git a/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h b/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h index 93e4e64830..b46bd3c407 100644 --- a/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h +++ b/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -71,7 +71,7 @@ protected: const QuantizationInfo quant_info = src_tensor.quantization_info(); std::pair bounds = get_quantized_bounds(quant_info, -1.f, 1.0f); std::uniform_int_distribution<> distribution(bounds.first, bounds.second); - std::uniform_int_distribution<> distribution_std(quant_info.quantize(0.1f, RoundingPolicy::TO_NEAREST_UP), bounds.second); + std::uniform_int_distribution<> distribution_std(quantize_qasymm8(0.1f, quant_info.uniform()), bounds.second); library->fill(src_tensor, distribution, 0); library->fill(mean_tensor, distribution, 1); library->fill(std_tensor, distribution_std, 2); diff --git a/tests/validation/fixtures/RangeFixture.h b/tests/validation/fixtures/RangeFixture.h index c192eee14f..4862069694 100644 --- a/tests/validation/fixtures/RangeFixture.h +++ b/tests/validation/fixtures/RangeFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -95,7 +95,9 @@ protected: end += std::max(half(1.0f), static_cast(distribution(gen))) * step; return utility::clamp(end); case DataType::QASYMM8: - return utility::clamp(end + (float)distribution(gen) * step, qinfo_out.dequantize(0), qinfo_out.dequantize(std::numeric_limits::max())); + return utility::clamp(end + (float)distribution(gen) * step, + dequantize_qasymm8(0, qinfo_out.uniform()), + dequantize_qasymm8(std::numeric_limits::max(), qinfo_out.uniform())); default: return 0; } diff --git a/tests/validation/reference/ConcatenateLayer.cpp b/tests/validation/reference/ConcatenateLayer.cpp index af818a576c..6c90d74a0f 100644 --- a/tests/validation/reference/ConcatenateLayer.cpp +++ b/tests/validation/reference/ConcatenateLayer.cpp @@ -72,10 +72,13 @@ SimpleTensor widthconcatenate_layer(const std::vector> &srcs, const int offset = u * height * depth + d * height + r; if(src.data_type() == DataType::QASYMM8 && src.quantization_info() != dst.quantization_info()) { - std::transform(src_ptr, src_ptr + width, dst_ptr + width_offset + offset * width_out, [src, dst](T t) + const UniformQuantizationInfo iq_info = src.quantization_info().uniform(); + const UniformQuantizationInfo oq_info = dst.quantization_info().uniform(); + + std::transform(src_ptr, src_ptr + width, dst_ptr + width_offset + offset * width_out, [&](T t) { - const float dequantized_input = src.quantization_info().dequantize(t); - return dst.quantization_info().quantize(dequantized_input, RoundingPolicy::TO_NEAREST_UP); + const float dequantized_input = dequantize_qasymm8(t, iq_info); + return quantize_qasymm8(dequantized_input, oq_info); }); src_ptr += width; } diff --git a/tests/validation/reference/Convolution3d.h b/tests/validation/reference/Convolution3d.h index 2e5fefd99a..30be25f504 100644 --- a/tests/validation/reference/Convolution3d.h +++ b/tests/validation/reference/Convolution3d.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -9,14 +9,14 @@ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: - *asymm_int_mult + * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, asymm_int_multDAMAGES OR OTHER + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. @@ -101,12 +101,16 @@ inline void convolution3d(const SimpleTensor &in, const SimpleTensor &weig const TB *b_ptr = bias.data() + b_offset; T *out_ptr = out.data() + o_offset; - const int input_offset = -in.quantization_info().offset; - const float input_scale = in.quantization_info().scale; - const int weights_offset = -weights.quantization_info().offset; - const float weights_scale = weights.quantization_info().scale; - const int output_offset = out.quantization_info().offset; - const float output_scale = out.quantization_info().scale; + const UniformQuantizationInfo iq_info = in.quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights.quantization_info().uniform(); + const UniformQuantizationInfo oq_info = out.quantization_info().uniform(); + + const int input_offset = -iq_info.offset; + const float input_scale = iq_info.scale; + const int weights_offset = -wq_info.offset; + const float weights_scale = wq_info.scale; + const int output_offset = oq_info.offset; + const float output_scale = oq_info.scale; int output_multiplier = 0; int output_shift = 0; diff --git a/tests/validation/reference/DeconvolutionLayer.cpp b/tests/validation/reference/DeconvolutionLayer.cpp index 916792479f..af59830722 100644 --- a/tests/validation/reference/DeconvolutionLayer.cpp +++ b/tests/validation/reference/DeconvolutionLayer.cpp @@ -68,7 +68,7 @@ SimpleTensor deconvolution_layer(const SimpleTensor &src, const SimpleTens if(src.data_type() == DataType::QASYMM8) { - const uint8_t quantized_zero = src.quantization_info().offset; + const uint8_t quantized_zero = src.quantization_info().uniform().offset; std::fill_n(scaled.data(), scaled.num_elements(), quantized_zero); } else diff --git a/tests/validation/reference/DepthConcatenateLayer.cpp b/tests/validation/reference/DepthConcatenateLayer.cpp index 22271a0d10..d6e6e78187 100644 --- a/tests/validation/reference/DepthConcatenateLayer.cpp +++ b/tests/validation/reference/DepthConcatenateLayer.cpp @@ -55,6 +55,7 @@ SimpleTensor depthconcatenate_layer(const std::vector> &srcs, { return tensor.quantization_info() != dst.quantization_info(); }; + if(srcs[0].data_type() == DataType::QASYMM8 && std::any_of(srcs.cbegin(), srcs.cend(), have_different_quantization_info)) { for(int b = 0; b < batches; ++b) @@ -64,11 +65,14 @@ SimpleTensor depthconcatenate_layer(const std::vector> &srcs, int slice = 0; for(const auto &src : srcs) { - auto ptr_slice = static_cast(dst(Coordinates(0, 0, slice, b))); - const auto num_elems_in_slice((dst.num_elements() / depth_out) * src.shape().z()); - std::transform(ptr_slice, ptr_slice + num_elems_in_slice, ptr_slice, [src, dst](T) + auto ptr_slice = static_cast(dst(Coordinates(0, 0, slice, b))); + const auto num_elems_in_slice((dst.num_elements() / depth_out) * src.shape().z()); + const UniformQuantizationInfo iq_info = src.quantization_info().uniform(); + const UniformQuantizationInfo oq_info = dst.quantization_info().uniform(); + + std::transform(ptr_slice, ptr_slice + num_elems_in_slice, ptr_slice, [&](T) { - return dst.quantization_info().quantize(src.quantization_info().dequantize(0), RoundingPolicy::TO_NEAREST_UP); + return quantize_qasymm8(dequantize_qasymm8(0, iq_info), oq_info); }); slice += src.shape().z(); } @@ -102,10 +106,12 @@ SimpleTensor depthconcatenate_layer(const std::vector> &srcs, { if(src.data_type() == DataType::QASYMM8 && src.quantization_info() != dst.quantization_info()) { - std::transform(src_ptr, src_ptr + width, dst.data() + offset_to_first_element + d * out_stride_z + r * width_out, [src, dst](T t) + const UniformQuantizationInfo iq_info = src.quantization_info().uniform(); + const UniformQuantizationInfo oq_info = dst.quantization_info().uniform(); + std::transform(src_ptr, src_ptr + width, dst.data() + offset_to_first_element + d * out_stride_z + r * width_out, [&](T t) { - const float dequantized_input = src.quantization_info().dequantize(t); - return dst.quantization_info().quantize(dequantized_input, RoundingPolicy::TO_NEAREST_UP); + const float dequantized_input = dequantize_qasymm8(t, iq_info); + return quantize_qasymm8(dequantized_input, oq_info); }); src_ptr += width; } diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.cpp b/tests/validation/reference/DepthwiseConvolutionLayer.cpp index 90ecffbbca..2192d681b6 100644 --- a/tests/validation/reference/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/reference/DepthwiseConvolutionLayer.cpp @@ -50,7 +50,7 @@ namespace reference */ template SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info, - unsigned int depth_multiplier, const Size2D &dilation, QuantizationInfo out_quant_info) + unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info) { ARM_COMPUTE_UNUSED(out_quant_info); @@ -126,22 +126,19 @@ SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTe template <> SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, - const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, QuantizationInfo out_quant_info) + const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info) { // if no explicit quantization has been set you the same as src - if(out_quant_info == QuantizationInfo(0.0f, 0)) - { - out_quant_info = src.quantization_info(); - } - SimpleTensor dst{ dst_shape, src.data_type(), 1, out_quant_info }; + const QuantizationInfo &dst_qinfo = out_quant_info.uniform().empty() ? src.quantization_info() : out_quant_info; + SimpleTensor dst{ dst_shape, src.data_type(), 1, dst_qinfo }; // Create reference - const int input_offset = -src.quantization_info().offset; - const float input_scale = src.quantization_info().scale; - const int weights_offset = -weights.quantization_info().offset; - const float weights_scale = weights.quantization_info().scale; - const int output_offset = dst.quantization_info().offset; - const float output_scale = dst.quantization_info().scale; + const int input_offset = -src.quantization_info().uniform().offset; + const float input_scale = src.quantization_info().uniform().scale; + const int weights_offset = -weights.quantization_info().uniform().offset; + const float weights_scale = weights.quantization_info().uniform().scale; + const int output_offset = dst.quantization_info().uniform().offset; + const float output_scale = dst.quantization_info().uniform().scale; int output_multiplier; int output_shift; @@ -224,10 +221,10 @@ SimpleTensor depthwise_convolution(const SimpleTensor &src, co } template SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, - const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, QuantizationInfo out_quant_info); + const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info); template SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, - const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, QuantizationInfo out_quant_info); + const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.h b/tests/validation/reference/DepthwiseConvolutionLayer.h index ac70de02ca..ee323fa8df 100644 --- a/tests/validation/reference/DepthwiseConvolutionLayer.h +++ b/tests/validation/reference/DepthwiseConvolutionLayer.h @@ -37,7 +37,7 @@ namespace reference { template SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info, - unsigned int depth_multiplier, const Size2D &dilation = Size2D(1U, 1U), QuantizationInfo out_quant_info = QuantizationInfo(0.0f, 0)); + unsigned int depth_multiplier, const Size2D &dilation = Size2D(1U, 1U), const QuantizationInfo &out_quant_info = QuantizationInfo(0.0f, 0)); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/DequantizationLayer.cpp b/tests/validation/reference/DequantizationLayer.cpp index df50c14ec7..286a609d79 100644 --- a/tests/validation/reference/DequantizationLayer.cpp +++ b/tests/validation/reference/DequantizationLayer.cpp @@ -34,14 +34,14 @@ namespace reference template SimpleTensor dequantization_layer(const SimpleTensor &src) { - const DataType dst_data_type = std::is_same::value ? DataType::F32 : DataType::F16; - const QuantizationInfo &quantization_info = src.quantization_info(); + const DataType dst_data_type = std::is_same::value ? DataType::F32 : DataType::F16; + const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform(); SimpleTensor dst{ src.shape(), dst_data_type }; for(int i = 0; i < src.num_elements(); ++i) { - dst[i] = static_cast(quantization_info.dequantize(src[i])); + dst[i] = static_cast(dequantize_qasymm8(src[i], quantization_info)); } return dst; diff --git a/tests/validation/reference/FullyConnectedLayer.cpp b/tests/validation/reference/FullyConnectedLayer.cpp index 07ddf6d308..cd84b9cfd1 100644 --- a/tests/validation/reference/FullyConnectedLayer.cpp +++ b/tests/validation/reference/FullyConnectedLayer.cpp @@ -67,12 +67,16 @@ void vector_matrix_multiply(const SimpleTensor &src, const SimpleTensor &w const TB *bias_ptr = bias.data(); T *dst_ptr = dst.data() + offset_dst; - const int input_offset = -src.quantization_info().offset; - const float input_scale = src.quantization_info().scale; - const int weights_offset = -weights.quantization_info().offset; - const float weights_scale = weights.quantization_info().scale; - const int output_offset = dst.quantization_info().offset; - const float output_scale = dst.quantization_info().scale; + const UniformQuantizationInfo iq_info = src.quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights.quantization_info().uniform(); + const UniformQuantizationInfo oq_info = dst.quantization_info().uniform(); + + const int input_offset = -iq_info.offset; + const float input_scale = iq_info.scale; + const int weights_offset = -wq_info.offset; + const float weights_scale = wq_info.scale; + const int output_offset = oq_info.offset; + const float output_scale = oq_info.scale; int output_multiplier = 0; int output_shift = 0; diff --git a/tests/validation/reference/Im2Col.cpp b/tests/validation/reference/Im2Col.cpp index 076b2aba07..4d63696e67 100644 --- a/tests/validation/reference/Im2Col.cpp +++ b/tests/validation/reference/Im2Col.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,7 +50,7 @@ void im2col_nchw(const SimpleTensor &src, SimpleTensor &dst, const Size2D const int src_channels = src.shape().z(); const int batches = src.shape().total_size_upper(3); const int dst_height = dst.shape().y(); - const int pad_val = is_data_type_quantized_asymmetric(src.data_type()) ? src.quantization_info().offset : 0; + const int pad_val = is_data_type_quantized_asymmetric(src.data_type()) ? src.quantization_info().uniform().offset : 0; int dst_idx = 0; // Compute width and height of the convolved tensors @@ -105,7 +105,7 @@ void im2col_nhwc(const SimpleTensor &src, SimpleTensor &dst, const Size2D const int batches = src.shape().total_size_upper(3); const int dst_width = has_bias ? dst.shape().x() - 1 : dst.shape().x(); const int dst_height = dst.shape().y(); - const int pad_val = is_data_type_quantized_asymmetric(src.data_type()) ? src.quantization_info().offset : 0; + const int pad_val = is_data_type_quantized_asymmetric(src.data_type()) ? src.quantization_info().uniform().offset : 0; // Compute width and height of the convolved tensors std::pair convolved_dims = scaled_dimensions(src_width, src_height, kernel_dims.width, kernel_dims.height, conv_info); diff --git a/tests/validation/reference/QuantizationLayer.cpp b/tests/validation/reference/QuantizationLayer.cpp index 2f3348178c..182585abf9 100644 --- a/tests/validation/reference/QuantizationLayer.cpp +++ b/tests/validation/reference/QuantizationLayer.cpp @@ -34,24 +34,25 @@ namespace validation namespace reference { template -SimpleTensor quantization_layer(const SimpleTensor &src, const QuantizationInfo quantization_info) +SimpleTensor quantization_layer(const SimpleTensor &src, const QuantizationInfo &quantization_info) { // Create reference SimpleTensor dst{ src.shape(), DataType::QASYMM8, 1, quantization_info }; + const UniformQuantizationInfo qinfo = quantization_info.uniform(); for(int i = 0; i < src.num_elements(); ++i) { #ifdef __aarch64__ - dst[i] = quantization_info.quantize((src[i]), RoundingPolicy::TO_NEAREST_EVEN); + dst[i] = quantize_qasymm8((src[i]), qinfo, RoundingPolicy::TO_NEAREST_EVEN); #else // __aarch64__ - dst[i] = quantization_info.quantize((src[i]), RoundingPolicy::TO_ZERO); + dst[i] = quantize_qasymm8((src[i]), qinfo, RoundingPolicy::TO_ZERO); #endif // __aarch64__ } return dst; } -template SimpleTensor quantization_layer(const SimpleTensor &src, const QuantizationInfo quantization_info); -template SimpleTensor quantization_layer(const SimpleTensor &src, const QuantizationInfo quantization_info); +template SimpleTensor quantization_layer(const SimpleTensor &src, const QuantizationInfo &quantization_info); +template SimpleTensor quantization_layer(const SimpleTensor &src, const QuantizationInfo &quantization_info); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/QuantizationLayer.h b/tests/validation/reference/QuantizationLayer.h index 2d136908af..462396f131 100644 --- a/tests/validation/reference/QuantizationLayer.h +++ b/tests/validation/reference/QuantizationLayer.h @@ -36,7 +36,7 @@ namespace validation namespace reference { template -SimpleTensor quantization_layer(const SimpleTensor &src, const QuantizationInfo quantization_info); +SimpleTensor quantization_layer(const SimpleTensor &src, const QuantizationInfo &quantization_info); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/Scale.cpp b/tests/validation/reference/Scale.cpp index 2f7bf2deb3..84f4fb83c1 100644 --- a/tests/validation/reference/Scale.cpp +++ b/tests/validation/reference/Scale.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -180,10 +180,10 @@ SimpleTensor scale(const SimpleTensor &src, float scale_x, flo SamplingPolicy sampling_policy, bool ceil_policy_scale) { SimpleTensor dst; - if(src.quantization_info().scale != 0.f) + if(src.quantization_info().uniform().scale != 0.f) { SimpleTensor src_tmp = convert_from_asymmetric(src); - float constant_border_value_f = scvt_f32_qasymm8(constant_border_value, src.quantization_info().scale, src.quantization_info().offset); + float constant_border_value_f = dequantize_qasymm8(constant_border_value, src.quantization_info()); SimpleTensor dst_tmp = scale_core(src_tmp, scale_x, scale_y, policy, border_mode, constant_border_value_f, sampling_policy, ceil_policy_scale); dst = convert_to_asymmetric(dst_tmp, src.quantization_info()); } diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h index 25c8cd396d..cf351724f0 100644 --- a/utils/TypePrinter.h +++ b/utils/TypePrinter.h @@ -24,7 +24,6 @@ #ifndef __ARM_COMPUTE_TYPE_PRINTER_H__ #define __ARM_COMPUTE_TYPE_PRINTER_H__ -#include "arm_compute/core/CL/CLTypes.h" #include "arm_compute/core/CPP/CPPTypes.h" #include "arm_compute/core/Dimensions.h" #include "arm_compute/core/Error.h" @@ -316,15 +315,21 @@ inline std::string to_string(const GenerateProposalsInfo &proposals_info) /** Formatted output of the QuantizationInfo type. * - * @param[out] os Output stream. - * @param[in] quantization_info Type to output. + * @param[out] os Output stream. + * @param[in] qinfo Type to output. * * @return Modified output stream. */ -inline ::std::ostream &operator<<(::std::ostream &os, const QuantizationInfo &quantization_info) +inline ::std::ostream &operator<<(::std::ostream &os, const QuantizationInfo &qinfo) { - os << "Scale:" << quantization_info.scale << "~" - << "Offset:" << quantization_info.offset; + if(!qinfo.scale.empty()) + { + os << "Scale:" << qinfo.scale[0] << "~"; + } + if(!qinfo.empty()) + { + os << "Offset:" << qinfo.offset[0]; + } return os; } @@ -619,9 +624,15 @@ inline ::std::ostream &operator<<(::std::ostream &os, const DataType &data_type) case DataType::U8: os << "U8"; break; + case DataType::QSYMM8: + os << "QSYMM8"; + break; case DataType::QASYMM8: os << "QASYMM8"; break; + case DataType::QSYMM8_PER_CHANNEL: + os << "QSYMM8_PER_CHANNEL"; + break; case DataType::S8: os << "S8"; break; diff --git a/utils/Utils.h b/utils/Utils.h index afd90a11a3..b4c23e849a 100644 --- a/utils/Utils.h +++ b/utils/Utils.h @@ -168,6 +168,8 @@ inline std::string get_typestring(DataType data_type) case DataType::QASYMM8: return no_endianness + "u" + support::cpp11::to_string(sizeof(uint8_t)); case DataType::S8: + case DataType::QSYMM8: + case DataType::QSYMM8_PER_CHANNEL: return no_endianness + "i" + support::cpp11::to_string(sizeof(int8_t)); case DataType::U16: return endianness + "u" + support::cpp11::to_string(sizeof(uint16_t)); -- cgit v1.2.1