diff options
author | Aron Virginas-Tar <Aron.Virginas-Tar@arm.com> | 2019-11-06 14:32:30 +0000 |
---|---|---|
committer | Aron Virginas-Tar <Aron.Virginas-Tar@arm.com> | 2019-11-06 15:28:34 +0000 |
commit | 9f0693b41a33d4d17ef016d8a5490cc65a8cfb8a (patch) | |
tree | e19c9f330b90d3161e089b281be26aac0e44e7be | |
parent | 3b959603ab1e3dc7b7b70798f357b3fe6c0e47c8 (diff) | |
download | android-nn-driver-9f0693b41a33d4d17ef016d8a5490cc65a8cfb8a.tar.gz |
IVGCVSW-3841 Add support for per-axis quantization
Signed-off-by: Aron Virginas-Tar <Aron.Virginas-Tar@arm.com>
Change-Id: Ife7fa63b8839465e8f9f8626f34ca8c0f4d12788
-rw-r--r-- | 1.2/ArmnnDriver.hpp | 6 | ||||
-rw-r--r-- | ConversionUtils.hpp | 46 | ||||
-rw-r--r-- | Utils.cpp | 28 |
3 files changed, 61 insertions, 19 deletions
diff --git a/1.2/ArmnnDriver.hpp b/1.2/ArmnnDriver.hpp index 7460f396..40116c8a 100644 --- a/1.2/ArmnnDriver.hpp +++ b/1.2/ArmnnDriver.hpp @@ -149,13 +149,13 @@ public: const android::hardware::hidl_vec<android::hardware::hidl_handle>&, const HidlToken&, const android::sp<V1_2::IPreparedModelCallback>& cb) { - ALOGV("hal_1_2::ArmnnDriver::prepareModel_1_1()"); + ALOGV("hal_1_2::ArmnnDriver::prepareModel_1_2()"); if (!(preference == ExecutionPreference::LOW_POWER || preference == ExecutionPreference::FAST_SINGLE_ANSWER || preference == ExecutionPreference::SUSTAINED_SPEED)) { - ALOGV("hal_1_2::ArmnnDriver::prepareModel_1_1: Invalid execution preference"); + ALOGV("hal_1_2::ArmnnDriver::prepareModel_1_2: Invalid execution preference"); cb->notify(ErrorStatus::INVALID_ARGUMENT, nullptr); return ErrorStatus::INVALID_ARGUMENT; } @@ -205,4 +205,4 @@ public: }; } // namespace hal_1_2 -} // namespace armnn_driver
\ No newline at end of file +} // namespace armnn_driver diff --git a/ConversionUtils.hpp b/ConversionUtils.hpp index e4ac4a5a..1975434a 100644 --- a/ConversionUtils.hpp +++ b/ConversionUtils.hpp @@ -183,11 +183,12 @@ inline bool IsOperandTypeSupportedForTensors(V1_0::OperandType type) inline bool IsOperandTypeSupportedForTensors(V1_2::OperandType type) { - return type == V1_2::OperandType::BOOL || - type == V1_2::OperandType::TENSOR_FLOAT16 || - type == V1_2::OperandType::TENSOR_FLOAT32 || - type == V1_2::OperandType::TENSOR_QUANT8_ASYMM || - type == V1_2::OperandType::TENSOR_QUANT16_SYMM || + return type == V1_2::OperandType::BOOL || + type == V1_2::OperandType::TENSOR_FLOAT16 || + type == V1_2::OperandType::TENSOR_FLOAT32 || + type == V1_2::OperandType::TENSOR_QUANT8_ASYMM || + type == V1_2::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL || + type == V1_2::OperandType::TENSOR_QUANT16_SYMM || type == V1_2::OperandType::TENSOR_INT32; } @@ -384,16 +385,37 @@ Shape GetOperandShape(const V1_2::Operand& operand) // we accept some tolerance. We don't want ArmNN itself to accept these inconsistencies as it is up to the // user (us, in this case) to ensure they match. void SanitizeBiasQuantizationScale(armnn::TensorInfo& biasInfo, - const armnn::TensorInfo& weightInfo, const armnn::TensorInfo& inputInfo) + const armnn::TensorInfo& weightInfo, + const armnn::TensorInfo& inputInfo) { - const float expectedBiasScale = weightInfo.GetQuantizationScale() * inputInfo.GetQuantizationScale(); - if (biasInfo.GetQuantizationScale() != expectedBiasScale) + if (weightInfo.HasPerAxisQuantization()) { - boost::math::fpc::close_at_tolerance<float> comparer(boost::math::fpc::percent_tolerance(1.0f)); - if (comparer(biasInfo.GetQuantizationScale(), expectedBiasScale)) + // NOTE: Bias scale is always set to 0 for per-axis quantization and + // it needs to be calculated: scale[i] = input_scale * weight_scale[i] + auto UpdateBiasScaleValue = [&inputInfo](float biasScale) -> float { - ALOGW("Bias quantization scale has been modified to match input*weights"); - biasInfo.SetQuantizationScale(expectedBiasScale); + return biasScale * inputInfo.GetQuantizationScale(); + }; + + std::vector<float> biasScales(weightInfo.GetQuantizationScales()); + std::transform(biasScales.begin(), biasScales.end(), biasScales.begin(), UpdateBiasScaleValue); + + biasInfo.SetQuantizationScales(biasScales); + biasInfo.SetQuantizationDim(weightInfo.GetQuantizationDim()); + + ALOGV("Bias quantization params have been updated for per-axis quantization"); + } + else + { + const float expectedBiasScale = weightInfo.GetQuantizationScale() * inputInfo.GetQuantizationScale(); + if (biasInfo.GetQuantizationScale() != expectedBiasScale) + { + boost::math::fpc::close_at_tolerance<float> comparer(boost::math::fpc::percent_tolerance(1.0f)); + if (comparer(biasInfo.GetQuantizationScale(), expectedBiasScale)) + { + ALOGW("Bias quantization scale has been modified to match input * weights"); + biasInfo.SetQuantizationScale(expectedBiasScale); + } } } } @@ -52,6 +52,9 @@ void SwizzleAndroidNn4dTensorToArmNn(const armnn::TensorInfo& tensor, const void case armnn::DataType::QuantisedAsymm8: SwizzleAndroidNn4dTensorToArmNn<uint8_t>(tensor.GetShape(), input, output, mappings); break; + case armnn::DataType::QuantizedSymm8PerAxis: + SwizzleAndroidNn4dTensorToArmNn<int8_t>(tensor.GetShape(), input, output, mappings); + break; default: ALOGW("Unknown armnn::DataType for swizzling"); assert(0); @@ -109,8 +112,9 @@ armnn::TensorInfo GetTensorInfoForOperand(const V1_0::Operand& operand) armnn::TensorInfo GetTensorInfoForOperand(const V1_2::Operand& operand) { - armnn::DataType type; + using namespace armnn; + DataType type; switch (operand.type) { case V1_2::OperandType::TENSOR_FLOAT32: @@ -119,6 +123,9 @@ armnn::TensorInfo GetTensorInfoForOperand(const V1_2::Operand& operand) case V1_2::OperandType::TENSOR_FLOAT16: type = armnn::DataType::Float16; break; + case V1_2::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL: + type = armnn::DataType::QuantizedSymm8PerAxis; + break; case V1_2::OperandType::TENSOR_QUANT8_ASYMM: type = armnn::DataType::QuantisedAsymm8; break; @@ -132,10 +139,23 @@ armnn::TensorInfo GetTensorInfoForOperand(const V1_2::Operand& operand) throw UnsupportedOperand<V1_2::OperandType>(operand.type); } - armnn::TensorInfo ret(operand.dimensions.size(), operand.dimensions.data(), type); + TensorInfo ret(operand.dimensions.size(), operand.dimensions.data(), type); + if (type == DataType::QuantizedSymm8PerAxis) + { + // ExtraParams is expected to be of type channelQuant + BOOST_ASSERT(operand.extraParams.getDiscriminator() == + V1_2::Operand::ExtraParams::hidl_discriminator::channelQuant); - ret.SetQuantizationScale(operand.scale); - ret.SetQuantizationOffset(operand.zeroPoint); + auto perAxisQuantParams = operand.extraParams.channelQuant(); + + ret.SetQuantizationScales(perAxisQuantParams.scales); + ret.SetQuantizationDim(MakeOptional<unsigned int>(perAxisQuantParams.channelDim)); + } + else + { + ret.SetQuantizationScale(operand.scale); + ret.SetQuantizationOffset(operand.zeroPoint); + } return ret; } |