From f01201abec0a102f6e7a517971f83fef1eaffd50 Mon Sep 17 00:00:00 2001 From: Giuseppe Rossini Date: Wed, 6 Nov 2019 14:57:49 +0000 Subject: COMPMID-2305: NEDepthwiseConvolution 3x3: support for QUANT8_PER_CHANNEL_SYMM Change-Id: I9a917cff6a089ce6ae16fb4e6066a4194e2e9487 Signed-off-by: Giuseppe Rossini Reviewed-on: https://review.mlplatform.org/c/2241 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins Reviewed-by: Pablo Marquez --- .../NEON/kernels/convolution/common/qsymm8.cpp | 185 +++++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100644 src/core/NEON/kernels/convolution/common/qsymm8.cpp (limited to 'src/core/NEON/kernels/convolution/common/qsymm8.cpp') diff --git a/src/core/NEON/kernels/convolution/common/qsymm8.cpp b/src/core/NEON/kernels/convolution/common/qsymm8.cpp new file mode 100644 index 0000000000..e50263acaa --- /dev/null +++ b/src/core/NEON/kernels/convolution/common/qsymm8.cpp @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "qsymm8.hpp" + +namespace qsymm8 { +#if(__ANDROID__ || BARE_METAL) +template T round(T val) { return ::round(val); } +template T exp2(T val) { return ::exp2(val); } +template T log2(T val) { return ::log2(val); } +#else /* (__ANDROID__ || BARE_METAL) */ +template T round(T val) { return std::round(val); } +template T exp2(T val) { return std::exp2(val); } +template T log2(T val) { return std::log2(val); } +#endif /* (__ANDROID__ || BARE_METAL) */ + +// Symmetric quantization +int8_t QSymm8Params::quantize(float value) const +{ + const float transformed = value / scale; + return static_cast(round(std::max(-128.0f, std::min(127.0f, transformed)))); +} + +float QSymm8Params::dequantize(const int8_t value) const +{ + return scale * (static_cast(value)); +} + +QSymm8RescaleParams QSymm8RescaleParams::make_rescale_params( + const QSymm8Params& weight_quant, + const QSymm8Params& input_quant, + const QSymm8Params& output_quant +) +{ + // Based on the gemmlowp approach: https://github.com/google/gemmlowp/blob/master/doc/quantization_example.cc + const float rescale = weight_quant.scale * input_quant.scale / output_quant.scale; + const float shiftf = round(log2(0.5f / rescale)); + const float multf = exp2(31.0f + shiftf)*rescale; + + int64_t shift = static_cast(shiftf); + int64_t mult = static_cast(multf); + + if (mult == (1ll << 31)) + { + mult /= 2; + shift--; + } + + assert(shift >= 0); + assert(mult <= std::numeric_limits::max()); + + return QSymm8RescaleParams( + static_cast(shift), + static_cast(mult), + rescale + ); +} + +QSymm8RescaleParams::QSymm8RescaleParams(int32_t shift, int32_t multi, float rescale) + : shift(shift), multiplier(multi), rescale(rescale) +{ +} + +// Symmetric per-channel quantization +int8_t QSymm8PerChannelParams::quantize(float value, float scale) const +{ + const float transformed = value / scale; + return static_cast(round(std::max(-128.0f, std::min(127.0f, transformed)))); +} + +float QSymm8PerChannelParams::dequantize(const int8_t value, float scale) const +{ + return scale * (static_cast(value)); +} + +QSymm8PerChannelRescaleParams QSymm8PerChannelRescaleParams::make_rescale_params( + const QSymm8PerChannelParams& weight_quant, + const QSymm8PerChannelParams& input_quant, + const QSymm8PerChannelParams& output_quant +) +{ + std::vector shifts; + std::vector mults; + std::vector rescales; + + for(size_t s = 0; s< input_quant.scales.size(); s++) + { + // Based on the gemmlowp approach: https://github.com/google/gemmlowp/blob/master/doc/quantization_example.cc + const float rescale = weight_quant.scales[s] * input_quant.scales[s] / output_quant.scales[s]; + const float shiftf = round(log2(0.5f / rescale)); + const float multf = exp2(31.0f + shiftf)*rescale; + + int64_t shift = static_cast(shiftf); + int64_t mult = static_cast(multf); + + if (mult == (1ll << 31)) + { + mult /= 2; + shift--; + } + + assert(shift >= 0); + assert(mult <= std::numeric_limits::max()); + + shifts.push_back(static_cast(shift)); + mults.push_back(static_cast(mult)); + rescales.push_back(rescale); + } + + return QSymm8PerChannelRescaleParams(shifts, mults, rescales); + +} + +QSymm8PerChannelRescaleParams QSymm8PerChannelRescaleParams::make_rescale_params( + const QSymm8PerChannelParams& weight_quant, + const qasymm8::QAsymm8Params& input_quant, + const qasymm8::QAsymm8Params& output_quant +) +{ + std::vector shifts; + std::vector mults; + std::vector rescales; + + for(size_t s = 0; s< weight_quant.scales.size(); s++) + { + // Based on the gemmlowp approach: https://github.com/google/gemmlowp/blob/master/doc/quantization_example.cc + const float rescale = weight_quant.scales[s] * input_quant.scale / output_quant.scale; + const float shiftf = round(log2(0.5f / rescale)); + const float multf = exp2(31.0f + shiftf)*rescale; + + int64_t shift = static_cast(shiftf); + int64_t mult = static_cast(multf); + + if (mult == (1ll << 31)) + { + mult /= 2; + shift--; + } + + assert(shift >= 0); + assert(mult <= std::numeric_limits::max()); + + shifts.push_back(static_cast(shift)); + mults.push_back(static_cast(mult)); + rescales.push_back(rescale); + } + + return QSymm8PerChannelRescaleParams(shifts, mults, rescales); + +} + +QSymm8PerChannelRescaleParams::QSymm8PerChannelRescaleParams(std::vector& shifts, std::vector& multipliers, std::vector& rescales) + : shifts(shifts), multipliers(multipliers), rescales(rescales) +{ +} + + +} // namespace qasymm8 -- cgit v1.2.1