From 9f0693b41a33d4d17ef016d8a5490cc65a8cfb8a Mon Sep 17 00:00:00 2001
From: Aron Virginas-Tar <Aron.Virginas-Tar@arm.com>
Date: Wed, 6 Nov 2019 14:32:30 +0000
Subject: IVGCVSW-3841 Add support for per-axis quantization

Signed-off-by: Aron Virginas-Tar <Aron.Virginas-Tar@arm.com>
Change-Id: Ife7fa63b8839465e8f9f8626f34ca8c0f4d12788
---
 1.2/ArmnnDriver.hpp |  6 +++---
 ConversionUtils.hpp | 46 ++++++++++++++++++++++++++++++++++------------
 Utils.cpp           | 28 ++++++++++++++++++++++++----
 3 files changed, 61 insertions(+), 19 deletions(-)
diff --git a/1.2/ArmnnDriver.hpp b/1.2/ArmnnDriver.hpp
index 7460f396..40116c8a 100644
--- a/1.2/ArmnnDriver.hpp
+++ b/1.2/ArmnnDriver.hpp
@@ -149,13 +149,13 @@ public:
             const android::hardware::hidl_vec<android::hardware::hidl_handle>&, const HidlToken&,
             const android::sp<V1_2::IPreparedModelCallback>& cb)
     {
-        ALOGV("hal_1_2::ArmnnDriver::prepareModel_1_1()");
+        ALOGV("hal_1_2::ArmnnDriver::prepareModel_1_2()");
 
         if (!(preference == ExecutionPreference::LOW_POWER ||
               preference == ExecutionPreference::FAST_SINGLE_ANSWER ||
               preference == ExecutionPreference::SUSTAINED_SPEED))
         {
-            ALOGV("hal_1_2::ArmnnDriver::prepareModel_1_1: Invalid execution preference");
+            ALOGV("hal_1_2::ArmnnDriver::prepareModel_1_2: Invalid execution preference");
             cb->notify(ErrorStatus::INVALID_ARGUMENT, nullptr);
             return ErrorStatus::INVALID_ARGUMENT;
         }
@@ -205,4 +205,4 @@ public:
 };
 
 } // namespace hal_1_2
-} // namespace armnn_driver
\ No newline at end of file
+} // namespace armnn_driver
diff --git a/ConversionUtils.hpp b/ConversionUtils.hpp
index e4ac4a5a..1975434a 100644
--- a/ConversionUtils.hpp
+++ b/ConversionUtils.hpp
@@ -183,11 +183,12 @@ inline bool IsOperandTypeSupportedForTensors(V1_0::OperandType type)
 
 inline bool IsOperandTypeSupportedForTensors(V1_2::OperandType type)
 {
-    return type == V1_2::OperandType::BOOL                ||
-           type == V1_2::OperandType::TENSOR_FLOAT16      ||
-           type == V1_2::OperandType::TENSOR_FLOAT32      ||
-           type == V1_2::OperandType::TENSOR_QUANT8_ASYMM ||
-           type == V1_2::OperandType::TENSOR_QUANT16_SYMM ||
+    return type == V1_2::OperandType::BOOL                           ||
+           type == V1_2::OperandType::TENSOR_FLOAT16                 ||
+           type == V1_2::OperandType::TENSOR_FLOAT32                 ||
+           type == V1_2::OperandType::TENSOR_QUANT8_ASYMM            ||
+           type == V1_2::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL ||
+           type == V1_2::OperandType::TENSOR_QUANT16_SYMM            ||
            type == V1_2::OperandType::TENSOR_INT32;
 }
 
@@ -384,16 +385,37 @@ Shape GetOperandShape(const V1_2::Operand& operand)
 // we accept some tolerance. We don't want ArmNN itself to accept these inconsistencies as it is up to the
 // user (us, in this case) to ensure they match.
 void SanitizeBiasQuantizationScale(armnn::TensorInfo& biasInfo,
-                                   const armnn::TensorInfo& weightInfo, const armnn::TensorInfo& inputInfo)
+                                   const armnn::TensorInfo& weightInfo,
+                                   const armnn::TensorInfo& inputInfo)
 {
-    const float expectedBiasScale = weightInfo.GetQuantizationScale() * inputInfo.GetQuantizationScale();
-    if (biasInfo.GetQuantizationScale() != expectedBiasScale)
+    if (weightInfo.HasPerAxisQuantization())
     {
-        boost::math::fpc::close_at_tolerance<float> comparer(boost::math::fpc::percent_tolerance(1.0f));
-        if (comparer(biasInfo.GetQuantizationScale(), expectedBiasScale))
+        // NOTE: Bias scale is always set to 0 for per-axis quantization and
+        // it needs to be calculated: scale[i] = input_scale * weight_scale[i]
+        auto UpdateBiasScaleValue = [&inputInfo](float biasScale) -> float
         {
-            ALOGW("Bias quantization scale has been modified to match input*weights");
-            biasInfo.SetQuantizationScale(expectedBiasScale);
+            return biasScale * inputInfo.GetQuantizationScale();
+        };
+
+        std::vector<float> biasScales(weightInfo.GetQuantizationScales());
+        std::transform(biasScales.begin(), biasScales.end(), biasScales.begin(), UpdateBiasScaleValue);
+
+        biasInfo.SetQuantizationScales(biasScales);
+        biasInfo.SetQuantizationDim(weightInfo.GetQuantizationDim());
+
+        ALOGV("Bias quantization params have been updated for per-axis quantization");
+    }
+    else
+    {
+        const float expectedBiasScale = weightInfo.GetQuantizationScale() * inputInfo.GetQuantizationScale();
+        if (biasInfo.GetQuantizationScale() != expectedBiasScale)
+        {
+            boost::math::fpc::close_at_tolerance<float> comparer(boost::math::fpc::percent_tolerance(1.0f));
+            if (comparer(biasInfo.GetQuantizationScale(), expectedBiasScale))
+            {
+                ALOGW("Bias quantization scale has been modified to match input * weights");
+                biasInfo.SetQuantizationScale(expectedBiasScale);
+            }
         }
     }
 }
diff --git a/Utils.cpp b/Utils.cpp
index 43b65ee3..246d6415 100644
--- a/Utils.cpp
+++ b/Utils.cpp
@@ -52,6 +52,9 @@ void SwizzleAndroidNn4dTensorToArmNn(const armnn::TensorInfo& tensor, const void
     case armnn::DataType::QuantisedAsymm8:
         SwizzleAndroidNn4dTensorToArmNn<uint8_t>(tensor.GetShape(), input, output, mappings);
         break;
+    case armnn::DataType::QuantizedSymm8PerAxis:
+        SwizzleAndroidNn4dTensorToArmNn<int8_t>(tensor.GetShape(), input, output, mappings);
+        break;
     default:
         ALOGW("Unknown armnn::DataType for swizzling");
         assert(0);
@@ -109,8 +112,9 @@ armnn::TensorInfo GetTensorInfoForOperand(const V1_0::Operand& operand)
 
 armnn::TensorInfo GetTensorInfoForOperand(const V1_2::Operand& operand)
 {
-    armnn::DataType type;
+    using namespace armnn;
 
+    DataType type;
     switch (operand.type)
     {
         case V1_2::OperandType::TENSOR_FLOAT32:
@@ -119,6 +123,9 @@ armnn::TensorInfo GetTensorInfoForOperand(const V1_2::Operand& operand)
         case V1_2::OperandType::TENSOR_FLOAT16:
             type = armnn::DataType::Float16;
             break;
+        case V1_2::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL:
+            type = armnn::DataType::QuantizedSymm8PerAxis;
+            break;
         case V1_2::OperandType::TENSOR_QUANT8_ASYMM:
             type = armnn::DataType::QuantisedAsymm8;
             break;
@@ -132,10 +139,23 @@ armnn::TensorInfo GetTensorInfoForOperand(const V1_2::Operand& operand)
             throw UnsupportedOperand<V1_2::OperandType>(operand.type);
     }
 
-    armnn::TensorInfo ret(operand.dimensions.size(), operand.dimensions.data(), type);
+    TensorInfo ret(operand.dimensions.size(), operand.dimensions.data(), type);
+    if (type == DataType::QuantizedSymm8PerAxis)
+    {
+        // ExtraParams is expected to be of type channelQuant
+        BOOST_ASSERT(operand.extraParams.getDiscriminator() ==
+                     V1_2::Operand::ExtraParams::hidl_discriminator::channelQuant);
 
-    ret.SetQuantizationScale(operand.scale);
-    ret.SetQuantizationOffset(operand.zeroPoint);
+        auto perAxisQuantParams = operand.extraParams.channelQuant();
+
+        ret.SetQuantizationScales(perAxisQuantParams.scales);
+        ret.SetQuantizationDim(MakeOptional<unsigned int>(perAxisQuantParams.channelDim));
+    }
+    else
+    {
+        ret.SetQuantizationScale(operand.scale);
+        ret.SetQuantizationOffset(operand.zeroPoint);
+    }
 
     return ret;
 }
-- 
cgit v1.2.1