aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAron Virginas-Tar <Aron.Virginas-Tar@arm.com>2019-11-06 14:32:30 +0000
committerAron Virginas-Tar <Aron.Virginas-Tar@arm.com>2019-11-06 15:28:34 +0000
commit9f0693b41a33d4d17ef016d8a5490cc65a8cfb8a (patch)
treee19c9f330b90d3161e089b281be26aac0e44e7be
parent3b959603ab1e3dc7b7b70798f357b3fe6c0e47c8 (diff)
downloadandroid-nn-driver-9f0693b41a33d4d17ef016d8a5490cc65a8cfb8a.tar.gz
IVGCVSW-3841 Add support for per-axis quantization
Signed-off-by: Aron Virginas-Tar <Aron.Virginas-Tar@arm.com> Change-Id: Ife7fa63b8839465e8f9f8626f34ca8c0f4d12788
-rw-r--r--1.2/ArmnnDriver.hpp6
-rw-r--r--ConversionUtils.hpp46
-rw-r--r--Utils.cpp28
3 files changed, 61 insertions, 19 deletions
diff --git a/1.2/ArmnnDriver.hpp b/1.2/ArmnnDriver.hpp
index 7460f396..40116c8a 100644
--- a/1.2/ArmnnDriver.hpp
+++ b/1.2/ArmnnDriver.hpp
@@ -149,13 +149,13 @@ public:
const android::hardware::hidl_vec<android::hardware::hidl_handle>&, const HidlToken&,
const android::sp<V1_2::IPreparedModelCallback>& cb)
{
- ALOGV("hal_1_2::ArmnnDriver::prepareModel_1_1()");
+ ALOGV("hal_1_2::ArmnnDriver::prepareModel_1_2()");
if (!(preference == ExecutionPreference::LOW_POWER ||
preference == ExecutionPreference::FAST_SINGLE_ANSWER ||
preference == ExecutionPreference::SUSTAINED_SPEED))
{
- ALOGV("hal_1_2::ArmnnDriver::prepareModel_1_1: Invalid execution preference");
+ ALOGV("hal_1_2::ArmnnDriver::prepareModel_1_2: Invalid execution preference");
cb->notify(ErrorStatus::INVALID_ARGUMENT, nullptr);
return ErrorStatus::INVALID_ARGUMENT;
}
@@ -205,4 +205,4 @@ public:
};
} // namespace hal_1_2
-} // namespace armnn_driver \ No newline at end of file
+} // namespace armnn_driver
diff --git a/ConversionUtils.hpp b/ConversionUtils.hpp
index e4ac4a5a..1975434a 100644
--- a/ConversionUtils.hpp
+++ b/ConversionUtils.hpp
@@ -183,11 +183,12 @@ inline bool IsOperandTypeSupportedForTensors(V1_0::OperandType type)
inline bool IsOperandTypeSupportedForTensors(V1_2::OperandType type)
{
- return type == V1_2::OperandType::BOOL ||
- type == V1_2::OperandType::TENSOR_FLOAT16 ||
- type == V1_2::OperandType::TENSOR_FLOAT32 ||
- type == V1_2::OperandType::TENSOR_QUANT8_ASYMM ||
- type == V1_2::OperandType::TENSOR_QUANT16_SYMM ||
+ return type == V1_2::OperandType::BOOL ||
+ type == V1_2::OperandType::TENSOR_FLOAT16 ||
+ type == V1_2::OperandType::TENSOR_FLOAT32 ||
+ type == V1_2::OperandType::TENSOR_QUANT8_ASYMM ||
+ type == V1_2::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL ||
+ type == V1_2::OperandType::TENSOR_QUANT16_SYMM ||
type == V1_2::OperandType::TENSOR_INT32;
}
@@ -384,16 +385,37 @@ Shape GetOperandShape(const V1_2::Operand& operand)
// we accept some tolerance. We don't want ArmNN itself to accept these inconsistencies as it is up to the
// user (us, in this case) to ensure they match.
void SanitizeBiasQuantizationScale(armnn::TensorInfo& biasInfo,
- const armnn::TensorInfo& weightInfo, const armnn::TensorInfo& inputInfo)
+ const armnn::TensorInfo& weightInfo,
+ const armnn::TensorInfo& inputInfo)
{
- const float expectedBiasScale = weightInfo.GetQuantizationScale() * inputInfo.GetQuantizationScale();
- if (biasInfo.GetQuantizationScale() != expectedBiasScale)
+ if (weightInfo.HasPerAxisQuantization())
{
- boost::math::fpc::close_at_tolerance<float> comparer(boost::math::fpc::percent_tolerance(1.0f));
- if (comparer(biasInfo.GetQuantizationScale(), expectedBiasScale))
+ // NOTE: Bias scale is always set to 0 for per-axis quantization and
+ // it needs to be calculated: scale[i] = input_scale * weight_scale[i]
+ auto UpdateBiasScaleValue = [&inputInfo](float biasScale) -> float
{
- ALOGW("Bias quantization scale has been modified to match input*weights");
- biasInfo.SetQuantizationScale(expectedBiasScale);
+ return biasScale * inputInfo.GetQuantizationScale();
+ };
+
+ std::vector<float> biasScales(weightInfo.GetQuantizationScales());
+ std::transform(biasScales.begin(), biasScales.end(), biasScales.begin(), UpdateBiasScaleValue);
+
+ biasInfo.SetQuantizationScales(biasScales);
+ biasInfo.SetQuantizationDim(weightInfo.GetQuantizationDim());
+
+ ALOGV("Bias quantization params have been updated for per-axis quantization");
+ }
+ else
+ {
+ const float expectedBiasScale = weightInfo.GetQuantizationScale() * inputInfo.GetQuantizationScale();
+ if (biasInfo.GetQuantizationScale() != expectedBiasScale)
+ {
+ boost::math::fpc::close_at_tolerance<float> comparer(boost::math::fpc::percent_tolerance(1.0f));
+ if (comparer(biasInfo.GetQuantizationScale(), expectedBiasScale))
+ {
+ ALOGW("Bias quantization scale has been modified to match input * weights");
+ biasInfo.SetQuantizationScale(expectedBiasScale);
+ }
}
}
}
diff --git a/Utils.cpp b/Utils.cpp
index 43b65ee3..246d6415 100644
--- a/Utils.cpp
+++ b/Utils.cpp
@@ -52,6 +52,9 @@ void SwizzleAndroidNn4dTensorToArmNn(const armnn::TensorInfo& tensor, const void
case armnn::DataType::QuantisedAsymm8:
SwizzleAndroidNn4dTensorToArmNn<uint8_t>(tensor.GetShape(), input, output, mappings);
break;
+ case armnn::DataType::QuantizedSymm8PerAxis:
+ SwizzleAndroidNn4dTensorToArmNn<int8_t>(tensor.GetShape(), input, output, mappings);
+ break;
default:
ALOGW("Unknown armnn::DataType for swizzling");
assert(0);
@@ -109,8 +112,9 @@ armnn::TensorInfo GetTensorInfoForOperand(const V1_0::Operand& operand)
armnn::TensorInfo GetTensorInfoForOperand(const V1_2::Operand& operand)
{
- armnn::DataType type;
+ using namespace armnn;
+ DataType type;
switch (operand.type)
{
case V1_2::OperandType::TENSOR_FLOAT32:
@@ -119,6 +123,9 @@ armnn::TensorInfo GetTensorInfoForOperand(const V1_2::Operand& operand)
case V1_2::OperandType::TENSOR_FLOAT16:
type = armnn::DataType::Float16;
break;
+ case V1_2::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL:
+ type = armnn::DataType::QuantizedSymm8PerAxis;
+ break;
case V1_2::OperandType::TENSOR_QUANT8_ASYMM:
type = armnn::DataType::QuantisedAsymm8;
break;
@@ -132,10 +139,23 @@ armnn::TensorInfo GetTensorInfoForOperand(const V1_2::Operand& operand)
throw UnsupportedOperand<V1_2::OperandType>(operand.type);
}
- armnn::TensorInfo ret(operand.dimensions.size(), operand.dimensions.data(), type);
+ TensorInfo ret(operand.dimensions.size(), operand.dimensions.data(), type);
+ if (type == DataType::QuantizedSymm8PerAxis)
+ {
+ // ExtraParams is expected to be of type channelQuant
+ BOOST_ASSERT(operand.extraParams.getDiscriminator() ==
+ V1_2::Operand::ExtraParams::hidl_discriminator::channelQuant);
- ret.SetQuantizationScale(operand.scale);
- ret.SetQuantizationOffset(operand.zeroPoint);
+ auto perAxisQuantParams = operand.extraParams.channelQuant();
+
+ ret.SetQuantizationScales(perAxisQuantParams.scales);
+ ret.SetQuantizationDim(MakeOptional<unsigned int>(perAxisQuantParams.channelDim));
+ }
+ else
+ {
+ ret.SetQuantizationScale(operand.scale);
+ ret.SetQuantizationOffset(operand.zeroPoint);
+ }
return ret;
}