From 9f0693b41a33d4d17ef016d8a5490cc65a8cfb8a Mon Sep 17 00:00:00 2001 From: Aron Virginas-Tar Date: Wed, 6 Nov 2019 14:32:30 +0000 Subject: IVGCVSW-3841 Add support for per-axis quantization Signed-off-by: Aron Virginas-Tar Change-Id: Ife7fa63b8839465e8f9f8626f34ca8c0f4d12788 --- 1.2/ArmnnDriver.hpp | 6 +++--- ConversionUtils.hpp | 46 ++++++++++++++++++++++++++++++++++------------ Utils.cpp | 28 ++++++++++++++++++++++++---- 3 files changed, 61 insertions(+), 19 deletions(-) diff --git a/1.2/ArmnnDriver.hpp b/1.2/ArmnnDriver.hpp index 7460f396..40116c8a 100644 --- a/1.2/ArmnnDriver.hpp +++ b/1.2/ArmnnDriver.hpp @@ -149,13 +149,13 @@ public: const android::hardware::hidl_vec&, const HidlToken&, const android::sp& cb) { - ALOGV("hal_1_2::ArmnnDriver::prepareModel_1_1()"); + ALOGV("hal_1_2::ArmnnDriver::prepareModel_1_2()"); if (!(preference == ExecutionPreference::LOW_POWER || preference == ExecutionPreference::FAST_SINGLE_ANSWER || preference == ExecutionPreference::SUSTAINED_SPEED)) { - ALOGV("hal_1_2::ArmnnDriver::prepareModel_1_1: Invalid execution preference"); + ALOGV("hal_1_2::ArmnnDriver::prepareModel_1_2: Invalid execution preference"); cb->notify(ErrorStatus::INVALID_ARGUMENT, nullptr); return ErrorStatus::INVALID_ARGUMENT; } @@ -205,4 +205,4 @@ public: }; } // namespace hal_1_2 -} // namespace armnn_driver \ No newline at end of file +} // namespace armnn_driver diff --git a/ConversionUtils.hpp b/ConversionUtils.hpp index e4ac4a5a..1975434a 100644 --- a/ConversionUtils.hpp +++ b/ConversionUtils.hpp @@ -183,11 +183,12 @@ inline bool IsOperandTypeSupportedForTensors(V1_0::OperandType type) inline bool IsOperandTypeSupportedForTensors(V1_2::OperandType type) { - return type == V1_2::OperandType::BOOL || - type == V1_2::OperandType::TENSOR_FLOAT16 || - type == V1_2::OperandType::TENSOR_FLOAT32 || - type == V1_2::OperandType::TENSOR_QUANT8_ASYMM || - type == V1_2::OperandType::TENSOR_QUANT16_SYMM || + return type == V1_2::OperandType::BOOL || + type == V1_2::OperandType::TENSOR_FLOAT16 || + type == V1_2::OperandType::TENSOR_FLOAT32 || + type == V1_2::OperandType::TENSOR_QUANT8_ASYMM || + type == V1_2::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL || + type == V1_2::OperandType::TENSOR_QUANT16_SYMM || type == V1_2::OperandType::TENSOR_INT32; } @@ -384,16 +385,37 @@ Shape GetOperandShape(const V1_2::Operand& operand) // we accept some tolerance. We don't want ArmNN itself to accept these inconsistencies as it is up to the // user (us, in this case) to ensure they match. void SanitizeBiasQuantizationScale(armnn::TensorInfo& biasInfo, - const armnn::TensorInfo& weightInfo, const armnn::TensorInfo& inputInfo) + const armnn::TensorInfo& weightInfo, + const armnn::TensorInfo& inputInfo) { - const float expectedBiasScale = weightInfo.GetQuantizationScale() * inputInfo.GetQuantizationScale(); - if (biasInfo.GetQuantizationScale() != expectedBiasScale) + if (weightInfo.HasPerAxisQuantization()) { - boost::math::fpc::close_at_tolerance comparer(boost::math::fpc::percent_tolerance(1.0f)); - if (comparer(biasInfo.GetQuantizationScale(), expectedBiasScale)) + // NOTE: Bias scale is always set to 0 for per-axis quantization and + // it needs to be calculated: scale[i] = input_scale * weight_scale[i] + auto UpdateBiasScaleValue = [&inputInfo](float biasScale) -> float { - ALOGW("Bias quantization scale has been modified to match input*weights"); - biasInfo.SetQuantizationScale(expectedBiasScale); + return biasScale * inputInfo.GetQuantizationScale(); + }; + + std::vector biasScales(weightInfo.GetQuantizationScales()); + std::transform(biasScales.begin(), biasScales.end(), biasScales.begin(), UpdateBiasScaleValue); + + biasInfo.SetQuantizationScales(biasScales); + biasInfo.SetQuantizationDim(weightInfo.GetQuantizationDim()); + + ALOGV("Bias quantization params have been updated for per-axis quantization"); + } + else + { + const float expectedBiasScale = weightInfo.GetQuantizationScale() * inputInfo.GetQuantizationScale(); + if (biasInfo.GetQuantizationScale() != expectedBiasScale) + { + boost::math::fpc::close_at_tolerance comparer(boost::math::fpc::percent_tolerance(1.0f)); + if (comparer(biasInfo.GetQuantizationScale(), expectedBiasScale)) + { + ALOGW("Bias quantization scale has been modified to match input * weights"); + biasInfo.SetQuantizationScale(expectedBiasScale); + } } } } diff --git a/Utils.cpp b/Utils.cpp index 43b65ee3..246d6415 100644 --- a/Utils.cpp +++ b/Utils.cpp @@ -52,6 +52,9 @@ void SwizzleAndroidNn4dTensorToArmNn(const armnn::TensorInfo& tensor, const void case armnn::DataType::QuantisedAsymm8: SwizzleAndroidNn4dTensorToArmNn(tensor.GetShape(), input, output, mappings); break; + case armnn::DataType::QuantizedSymm8PerAxis: + SwizzleAndroidNn4dTensorToArmNn(tensor.GetShape(), input, output, mappings); + break; default: ALOGW("Unknown armnn::DataType for swizzling"); assert(0); @@ -109,8 +112,9 @@ armnn::TensorInfo GetTensorInfoForOperand(const V1_0::Operand& operand) armnn::TensorInfo GetTensorInfoForOperand(const V1_2::Operand& operand) { - armnn::DataType type; + using namespace armnn; + DataType type; switch (operand.type) { case V1_2::OperandType::TENSOR_FLOAT32: @@ -119,6 +123,9 @@ armnn::TensorInfo GetTensorInfoForOperand(const V1_2::Operand& operand) case V1_2::OperandType::TENSOR_FLOAT16: type = armnn::DataType::Float16; break; + case V1_2::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL: + type = armnn::DataType::QuantizedSymm8PerAxis; + break; case V1_2::OperandType::TENSOR_QUANT8_ASYMM: type = armnn::DataType::QuantisedAsymm8; break; @@ -132,10 +139,23 @@ armnn::TensorInfo GetTensorInfoForOperand(const V1_2::Operand& operand) throw UnsupportedOperand(operand.type); } - armnn::TensorInfo ret(operand.dimensions.size(), operand.dimensions.data(), type); + TensorInfo ret(operand.dimensions.size(), operand.dimensions.data(), type); + if (type == DataType::QuantizedSymm8PerAxis) + { + // ExtraParams is expected to be of type channelQuant + BOOST_ASSERT(operand.extraParams.getDiscriminator() == + V1_2::Operand::ExtraParams::hidl_discriminator::channelQuant); - ret.SetQuantizationScale(operand.scale); - ret.SetQuantizationOffset(operand.zeroPoint); + auto perAxisQuantParams = operand.extraParams.channelQuant(); + + ret.SetQuantizationScales(perAxisQuantParams.scales); + ret.SetQuantizationDim(MakeOptional(perAxisQuantParams.channelDim)); + } + else + { + ret.SetQuantizationScale(operand.scale); + ret.SetQuantizationOffset(operand.zeroPoint); + } return ret; } -- cgit v1.2.1