From fd2710651ada27fc82f28c07fb1e09effc3bda2d Mon Sep 17 00:00:00 2001 From: Finn Williams Date: Wed, 4 Dec 2019 14:27:27 +0000 Subject: IVGCVSW-4211 Add Signed 8 bit Quantisation support into the Reference backend !android-nn-driver:2435 Signed-off-by: Finn Williams Change-Id: I10ecd4a8937725953396805f33a3562a5384c4d4 --- include/armnn/Types.hpp | 2 +- include/armnn/TypesUtils.hpp | 4 +- src/armnn/CompatibleTypes.hpp | 2 +- src/armnn/NetworkQuantizationScheme.hpp | 33 ++ src/armnn/NetworkQuantizer.cpp | 3 + src/armnn/ResolveType.hpp | 6 + src/armnn/test/QuantizerTest.cpp | 488 +++++++++++++++------ src/backends/aclCommon/ArmComputeTensorUtils.cpp | 2 +- src/backends/backendsCommon/WorkloadData.cpp | 1 + .../test/layerTests/DequantizeTestImpl.cpp | 14 + .../test/layerTests/DequantizeTestImpl.hpp | 8 + .../test/layerTests/QuantizeTestImpl.cpp | 7 + .../test/layerTests/QuantizeTestImpl.hpp | 4 + src/backends/reference/RefLayerSupport.cpp | 8 +- src/backends/reference/test/RefLayerTests.cpp | 3 + src/backends/reference/workloads/BaseIterator.hpp | 43 ++ src/backends/reference/workloads/Decoders.hpp | 7 + src/backends/reference/workloads/Encoders.hpp | 7 + .../reference/workloads/RefQuantizeWorkload.cpp | 5 + 19 files changed, 517 insertions(+), 130 deletions(-) diff --git a/include/armnn/Types.hpp b/include/armnn/Types.hpp index df5e6a7199..dcc8c9e52c 100644 --- a/include/armnn/Types.hpp +++ b/include/armnn/Types.hpp @@ -37,7 +37,7 @@ enum class DataType Boolean = 4, QuantisedSymm16 = 5, QuantizedSymm8PerAxis = 6, - QuantisedSymm8 = 7 + QSymmS8 = 7 }; enum class DataLayout diff --git a/include/armnn/TypesUtils.hpp b/include/armnn/TypesUtils.hpp index c8f612f08d..4394d9ddd9 100644 --- a/include/armnn/TypesUtils.hpp +++ b/include/armnn/TypesUtils.hpp @@ -116,6 +116,7 @@ constexpr unsigned int GetDataTypeSize(DataType dataType) case DataType::Float32: case DataType::Signed32: return 4U; case DataType::QuantisedAsymm8: return 1U; + case DataType::QSymmS8: return 1U; case DataType::QuantizedSymm8PerAxis: return 1U; case DataType::QuantisedSymm16: return 2U; case DataType::Boolean: return 1U; @@ -163,6 +164,7 @@ constexpr const char* GetDataTypeName(DataType dataType) case DataType::Float16: return "Float16"; case DataType::Float32: return "Float32"; case DataType::QuantisedAsymm8: return "QAsymm8"; + case DataType::QSymmS8: return "QSymm8"; case DataType::QuantizedSymm8PerAxis: return "QSymm8PerAxis"; case DataType::QuantisedSymm16: return "QSymm16"; case DataType::Signed32: return "Signed32"; @@ -198,7 +200,7 @@ constexpr bool IsQuantizedType() constexpr bool IsQuantizedType(DataType dataType) { return dataType == DataType::QuantisedAsymm8 || - dataType == DataType::QuantisedSymm8 || + dataType == DataType::QSymmS8 || dataType == DataType::QuantisedSymm16 || dataType == DataType::QuantizedSymm8PerAxis; } diff --git a/src/armnn/CompatibleTypes.hpp b/src/armnn/CompatibleTypes.hpp index 06aa064f7b..fd33f6c37a 100644 --- a/src/armnn/CompatibleTypes.hpp +++ b/src/armnn/CompatibleTypes.hpp @@ -38,7 +38,7 @@ inline bool CompatibleTypes(DataType dataType) template<> inline bool CompatibleTypes(DataType dataType) { - return dataType == DataType::QuantisedSymm8 || dataType == DataType::QuantizedSymm8PerAxis; + return dataType == DataType::QSymmS8 || dataType == DataType::QuantizedSymm8PerAxis; } template<> diff --git a/src/armnn/NetworkQuantizationScheme.hpp b/src/armnn/NetworkQuantizationScheme.hpp index 0effa1fd64..ea3c29102b 100644 --- a/src/armnn/NetworkQuantizationScheme.hpp +++ b/src/armnn/NetworkQuantizationScheme.hpp @@ -61,6 +61,34 @@ struct QAsymm8QuantizationScheme : IQuantizationScheme DataType GetDataType() const override { return DataType::QuantisedAsymm8; } }; +struct QSymmS8QuantizationScheme : IQuantizationScheme +{ + OffsetScalePair ComputeScheme(double min, double max) const override + { + if (min > max) + { + throw InvalidArgumentException("min > max will result in invalid quantization."); + } + + // To avoid dividing by zero when quantizing a zero filled tensor + if (min == 0.0 && max == 0.0) + { + max = 1.0; + } + + double highest = (1 << (NumBits()-1)) - 1; // (numbits-1) accounts for the sign bit + + double extent = std::max(std::abs(min), std::abs(max)); + double scale = extent / highest; + + return std::make_pair(static_cast(scale), 0); + } + + int NumBits() const override { return 8; } + + DataType GetDataType() const override { return DataType::QSymmS8; } +}; + struct QSymm16QuantizationScheme : IQuantizationScheme { OffsetScalePair ComputeScheme(double min, double max) const override @@ -81,7 +109,12 @@ struct QSymm16QuantizationScheme : IQuantizationScheme double extent = std::max(std::abs(min), std::abs(max)); double scale = extent / highest; + if(scale == 0.000457777642) + { + return std::make_pair(static_cast(scale), 0); + } return std::make_pair(static_cast(scale), 0); + } int NumBits() const override { return 16; } diff --git a/src/armnn/NetworkQuantizer.cpp b/src/armnn/NetworkQuantizer.cpp index dd2f32414a..f6d625fda3 100644 --- a/src/armnn/NetworkQuantizer.cpp +++ b/src/armnn/NetworkQuantizer.cpp @@ -163,6 +163,9 @@ INetworkPtr NetworkQuantizer::ExportNetwork() case DataType::QuantisedAsymm8: quantizationScheme = std::make_unique(); break; + case DataType::QSymmS8: + quantizationScheme = std::make_unique(); + break; case DataType::QuantisedSymm16: quantizationScheme = std::make_unique(); break; diff --git a/src/armnn/ResolveType.hpp b/src/armnn/ResolveType.hpp index 55269f4620..ba3d0fca5c 100644 --- a/src/armnn/ResolveType.hpp +++ b/src/armnn/ResolveType.hpp @@ -32,6 +32,12 @@ struct ResolveTypeImpl using Type = uint8_t; }; +template<> +struct ResolveTypeImpl +{ + using Type = int8_t; +}; + template<> struct ResolveTypeImpl { diff --git a/src/armnn/test/QuantizerTest.cpp b/src/armnn/test/QuantizerTest.cpp index 3f57ce83b2..37b3bfa852 100644 --- a/src/armnn/test/QuantizerTest.cpp +++ b/src/armnn/test/QuantizerTest.cpp @@ -30,6 +30,7 @@ using MinMaxRanges = std::vector; using MinMaxRangeMap = std::unordered_map; const float g_Asymm8QuantizationBase = 255.0f; +const float g_Symm8QuantizationBase = 127.0f; const float g_Symm16QuantizationBase = 32767.0f; const float g_TestTolerance = 0.000001f; @@ -57,7 +58,9 @@ public: const TensorInfo& info = layer->GetOutputSlot(0).GetTensorInfo(); BOOST_TEST(m_InputShape == info.GetShape()); // Based off current default [-15.0f, 15.0f] - TestQuantizationParams(info, {30.0f / g_Asymm8QuantizationBase, 128}, {15.0f / g_Symm16QuantizationBase, 0}); + TestQuantizationParams(info, {30.0f / g_Asymm8QuantizationBase, 128}, + {15.0f / g_Symm8QuantizationBase , 0}, + {15.0f / g_Symm16QuantizationBase, 0}); } void VisitOutputLayer(const IConnectableLayer* layer, @@ -71,6 +74,7 @@ public: protected: void TestQuantizationParams(const TensorInfo& info, const OffsetScalePair& qAsymm8Params, + const OffsetScalePair& qSymm8Params, const OffsetScalePair& qSymm16Params) { switch (m_QuantizerOptions.m_ActivationFormat) @@ -79,6 +83,10 @@ protected: TestQuantizationParamsImpl( info, DataType::QuantisedAsymm8, qAsymm8Params.first, qAsymm8Params.second); break; + case DataType::QSymmS8: + TestQuantizationParamsImpl( + info, DataType::QSymmS8, qSymm8Params.first, qSymm8Params.second); + break; case DataType::QuantisedSymm16: TestQuantizationParamsImpl( info, DataType::QuantisedSymm16, qSymm16Params.first, qSymm16Params.second); @@ -102,6 +110,7 @@ protected: void TestBiasQuantizationParams(const TensorInfo& info, const OffsetScalePair& qAsymm8Params, + const OffsetScalePair& qSymm8Params, const OffsetScalePair& qSymm16Params, DataType dataType = DataType::QuantisedAsymm8) { @@ -110,6 +119,9 @@ protected: case DataType::QuantisedAsymm8: TestQuantizationParamsImpl(info, dataType, qAsymm8Params.first, qAsymm8Params.second); break; + case DataType::QSymmS8: + TestQuantizationParamsImpl(info, dataType, qSymm8Params.first, qSymm8Params.second); + break; case DataType::QuantisedSymm16: TestQuantizationParamsImpl(info, dataType, qSymm16Params.first, qSymm16Params.second); break; @@ -124,11 +136,12 @@ protected: { TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); float inputScaleQAsymm8 = 30.0f / g_Asymm8QuantizationBase; + float inputScaleQSymm8 = 15.0f / g_Symm8QuantizationBase; float inputScaleQSymm16 = 15.0f / g_Symm16QuantizationBase; float weightsScale = 3.0f / g_Asymm8QuantizationBase; // Based off default static range [-15.0f, 15.0f] - TestQuantizationParams(info, {inputScaleQAsymm8, 128}, {inputScaleQSymm16, 0}); + TestQuantizationParams(info, {inputScaleQAsymm8, 128}, {inputScaleQSymm8, 0}, {inputScaleQSymm16, 0}); TestConstantQuantizationParams(weights.GetInfo(), {weightsScale, 85}); @@ -136,6 +149,7 @@ protected: { TestBiasQuantizationParams(biases.value().GetInfo(), {inputScaleQAsymm8 * weightsScale, 0}, + {inputScaleQSymm8 * weightsScale, 0}, {inputScaleQSymm16 * weightsScale, 0}, DataType::Signed32); } @@ -180,7 +194,9 @@ public: TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); // Based off default static range [-20.0f, 20.0f] - TestQuantizationParams(info, {40.0f / g_Asymm8QuantizationBase, 128}, {20.0f / g_Symm16QuantizationBase, 0}); + TestQuantizationParams(info, {40.0f / g_Asymm8QuantizationBase, 128}, + {20.0f / g_Symm8QuantizationBase, 0}, + {20.0f / g_Symm16QuantizationBase, 0}); } }; @@ -211,9 +227,14 @@ BOOST_AUTO_TEST_CASE(QuantizeAddition) TestAdditionQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestAdditionQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestAdditionQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestAdditionQuantization validatorQSymm16(qSymm16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -235,7 +256,9 @@ public: TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); // Based off default static range [0.0f, 15.0f] - TestQuantizationParams(info, {15.0f / g_Asymm8QuantizationBase, 0}, {15.0f / g_Symm16QuantizationBase, 0}); + TestQuantizationParams(info, {15.0f / g_Asymm8QuantizationBase, 0}, + {15.0f / g_Symm8QuantizationBase, 0}, + {15.0f / g_Symm16QuantizationBase, 0}); } }; @@ -363,9 +386,14 @@ BOOST_AUTO_TEST_CASE(QuantizeAbsActivation) TestActivationQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestActivationQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestActivationQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestActivationQuantization validatorQSymm16(qSymm16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -383,9 +411,14 @@ BOOST_AUTO_TEST_CASE(QuantizeLinearActivation) TestActivationQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestActivationQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestActivationQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestActivationQuantization validatorQSymm16(qSymm16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -403,9 +436,14 @@ BOOST_AUTO_TEST_CASE(QuantizeReLuActivation) TestActivationQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestActivationQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestActivationQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestActivationQuantization validatorQSymm16(qSymm16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -423,9 +461,14 @@ BOOST_AUTO_TEST_CASE(QuantizeSoftReLuActivation) TestActivationQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestActivationQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestActivationQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestActivationQuantization validatorQSymm16(qSymm16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -449,7 +492,9 @@ BOOST_AUTO_TEST_CASE(QuantizeBoundedReluActivation) TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); // Based off default static range [0.0f, 3.5f] - TestQuantizationParams(info, {3.5f / g_Asymm8QuantizationBase, 0}, {3.5f / g_Symm16QuantizationBase, 0}); + TestQuantizationParams(info, {3.5f / g_Asymm8QuantizationBase, 0}, + {3.5f / g_Symm8QuantizationBase, 0}, + {3.5f / g_Symm16QuantizationBase, 0}); } }; @@ -465,9 +510,14 @@ BOOST_AUTO_TEST_CASE(QuantizeBoundedReluActivation) TestBoundedReluActivationQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestBoundedReluActivationQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestBoundedReluActivationQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestBoundedReluActivationQuantization validatorQSymm16(qSymm16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -492,7 +542,9 @@ BOOST_AUTO_TEST_CASE(QuantizeTanHActivation) // Based off default static range [-1.0f, 1.0f] TestQuantizationParams( - info, {2.0f / g_Asymm8QuantizationBase, 128}, {1.0f / g_Symm16QuantizationBase, 0}); + info, {2.0f / g_Asymm8QuantizationBase, 128}, + {1.0f / g_Symm8QuantizationBase, 0}, + {1.0f / g_Symm16QuantizationBase, 0}); } }; @@ -508,9 +560,14 @@ BOOST_AUTO_TEST_CASE(QuantizeTanHActivation) TestTanHActivationQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestTanHActivationQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestTanHActivationQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestTanHActivationQuantization validatorQSymm16(qSymm16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -532,7 +589,9 @@ public: TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); // Based off default static range [-5.0f, 15.0f] - TestQuantizationParams(info, {20.0f / g_Asymm8QuantizationBase, 64}, {15.0f / g_Symm16QuantizationBase, 0}); + TestQuantizationParams(info, {20.0f / g_Asymm8QuantizationBase, 64}, + {15.0f / g_Symm8QuantizationBase, 0}, + {15.0f / g_Symm16QuantizationBase, 0}); } protected: @@ -541,7 +600,9 @@ protected: void CheckForwardedQuantizationSettings(const IConnectableLayer* layer) { TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); - TestQuantizationParams(info, {20.0f / g_Asymm8QuantizationBase, 64}, {15.0f / g_Symm16QuantizationBase, 0}); + TestQuantizationParams(info, {20.0f / g_Asymm8QuantizationBase, 64}, + {15.0f / g_Symm8QuantizationBase, 0}, + {15.0f / g_Symm16QuantizationBase, 0}); } }; @@ -559,9 +620,14 @@ BOOST_AUTO_TEST_CASE(QuantizeLeakyReLuActivation) TestLeakyReLuActivationQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestLeakyReLuActivationQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestLeakyReLuActivationQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestLeakyReLuActivationQuantization validatorQSymm16(qSymm16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -590,7 +656,9 @@ BOOST_AUTO_TEST_CASE(QuantizeBatchNorm) // Based off default static range [-15.0f, 15.0f] TestQuantizationParams( - info, {30.0f / g_Asymm8QuantizationBase, 128}, {15.0f / g_Symm16QuantizationBase, 0}); + info, {30.0f / g_Asymm8QuantizationBase, 128}, + {15.0f / g_Symm8QuantizationBase, 0}, + {15.0f / g_Symm16QuantizationBase, 0}); // Test constants TestConstantQuantizationParams(mean.GetInfo(), {3.0f / g_Asymm8QuantizationBase, 85}); @@ -634,9 +702,14 @@ BOOST_AUTO_TEST_CASE(QuantizeBatchNorm) TestBatchNormalizationQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestBatchNormalizationQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestBatchNormalizationQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions QQsymm16Options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), QQsymm16Options)->ExportNetwork(); + TestBatchNormalizationQuantization validatorQSymm16(QQsymm16Options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -660,9 +733,10 @@ BOOST_AUTO_TEST_CASE(QuantizeDepthToSpace) const TensorInfo& info = layer->GetOutputSlot(0).GetTensorInfo(); const OffsetScalePair qAsymm8Params{ 30.0f / g_Asymm8QuantizationBase, 128 }; + const OffsetScalePair qSymm8Params { 15.0f / g_Symm8QuantizationBase, 0 }; const OffsetScalePair qSymm16Params{ 15.0f / g_Symm16QuantizationBase, 0 }; - TestQuantizationParams(info, qAsymm8Params, qSymm16Params); + TestQuantizationParams(info, qAsymm8Params, qSymm8Params, qSymm16Params); } }; @@ -690,10 +764,16 @@ BOOST_AUTO_TEST_CASE(QuantizeDepthToSpace) TestDepthToSpaceQuantization validatorQAsymm8(inputShape, outputShape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); + // test QSymm8 quantization + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestDepthToSpaceQuantization validatorQSymm8(qSymm8Options, inputShape, outputShape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + // test QSymm16 quantization - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestDepthToSpaceQuantization validatorQSymm16(options, inputShape, outputShape); + const QuantizerOptions Qsymm16Options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), Qsymm16Options)->ExportNetwork(); + TestDepthToSpaceQuantization validatorQSymm16(Qsymm16Options, inputShape, outputShape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -845,9 +925,14 @@ void ValidateFullyConnectedLayer(const bool biasEnabled) TestFullyConnectedQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestFullyConnectedQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestFullyConnectedQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions Qsymm16Options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), Qsymm16Options)->ExportNetwork(); + TestFullyConnectedQuantization validatorQSymm16(Qsymm16Options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -920,9 +1005,14 @@ void TestQuantizeConvolution2d(bool useBiases) TestConv2dQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestConv2dQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestConv2dQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions Qsymm16Options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), Qsymm16Options)->ExportNetwork(); + TestConv2dQuantization validatorQSymm16(Qsymm16Options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -995,9 +1085,14 @@ void TestQuantizeDepthwiseConvolution2d(bool useBiases) TestDepthwiseConv2dQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestDepthwiseConv2dQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestDepthwiseConv2dQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions Qsymm16Options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), Qsymm16Options)->ExportNetwork(); + TestDepthwiseConv2dQuantization validatorQSymm16(Qsymm16Options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -1031,9 +1126,10 @@ BOOST_AUTO_TEST_CASE(QuantizeInstanceNormalization) const TensorInfo& info = layer->GetOutputSlot(0).GetTensorInfo(); const OffsetScalePair qAsymm8Params{ 30.0f / g_Asymm8QuantizationBase, 128 }; + const OffsetScalePair qSymm8Params { 15.0f / g_Symm8QuantizationBase, 0}; const OffsetScalePair qSymm16Params{ 15.0f / g_Symm16QuantizationBase, 0 }; - TestQuantizationParams(info, qAsymm8Params, qSymm16Params); + TestQuantizationParams(info, qAsymm8Params, qSymm8Params, qSymm16Params); } }; @@ -1057,10 +1153,16 @@ BOOST_AUTO_TEST_CASE(QuantizeInstanceNormalization) TestInstanceNormalizationQuantization validatorQAsymm8(tensorShape, tensorShape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); + // test QSymm8 quantization + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestInstanceNormalizationQuantization validatorQSymm8(qSymm8Options, tensorShape, tensorShape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + // test QSymm16 quantization - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestInstanceNormalizationQuantization validatorQSymm16(options, tensorShape, tensorShape); + const QuantizerOptions qSymm16Options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16Options)->ExportNetwork(); + TestInstanceNormalizationQuantization validatorQSymm16(qSymm16Options, tensorShape, tensorShape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -1084,9 +1186,10 @@ BOOST_AUTO_TEST_CASE(QuantizeLogSoftmax) TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); const OffsetScalePair qAsymm8Params{ 30.0f / g_Asymm8QuantizationBase, 128 }; + const OffsetScalePair qSymm8Params { 15.0f / g_Symm8QuantizationBase, 0}; const OffsetScalePair qSymm16Params{ 15.0f / g_Symm16QuantizationBase, 0 }; - TestQuantizationParams(info, qAsymm8Params, qSymm16Params); + TestQuantizationParams(info, qAsymm8Params, qSymm8Params, qSymm16Params); } }; @@ -1113,10 +1216,16 @@ BOOST_AUTO_TEST_CASE(QuantizeLogSoftmax) TestLogSoftmaxQuantization validatorQAsymm8(tensorShape, tensorShape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); + // test QSymm8 quantization + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestLogSoftmaxQuantization validatorQSymm8(qSymm8Options, tensorShape, tensorShape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + // test QuantisedSymm16 quantization - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestLogSoftmaxQuantization validatorQSymm16(options, tensorShape, tensorShape); + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestLogSoftmaxQuantization validatorQSymm16(qSymm16options, tensorShape, tensorShape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -1161,7 +1270,9 @@ BOOST_AUTO_TEST_CASE(QuantizeSoftmax) TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); // Based off default static range [0.0f, 1.0f] - TestQuantizationParams(info, {1.0f / g_Asymm8QuantizationBase, 0}, {1.0f / g_Symm16QuantizationBase, 0}); + TestQuantizationParams(info, {1.0f / g_Asymm8QuantizationBase, 0}, + {1.0f / g_Symm8QuantizationBase, 0}, + {1.0f / g_Symm16QuantizationBase, 0}); } }; @@ -1175,9 +1286,15 @@ BOOST_AUTO_TEST_CASE(QuantizeSoftmax) TestSoftmaxQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestSoftmaxQuantization validatorQSymm16(options, shape, shape); + // test QSymm8 quantization + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestSoftmaxQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestSoftmaxQuantization validatorQSymm16(qSymm16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -1207,8 +1324,13 @@ BOOST_AUTO_TEST_CASE(QuantizeStandIn) armnn::UnimplementedException); // test QuantisedSymm16 quantization - const QuantizerOptions options(DataType::QuantisedSymm16); - BOOST_CHECK_THROW(INetworkQuantizer::Create(network.get(), options)->ExportNetwork(), + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + BOOST_CHECK_THROW(INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(), + armnn::UnimplementedException); + + // test QuantisedSymm16 quantization + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + BOOST_CHECK_THROW(INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(), armnn::UnimplementedException); } @@ -1287,9 +1409,14 @@ BOOST_AUTO_TEST_CASE(QuantizePermute) TestPermuteQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestPermuteQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestPermuteQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestPermuteQuantization validatorQSymm16(qSymm16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -1331,9 +1458,14 @@ BOOST_AUTO_TEST_CASE(QuantizeSpaceToBatch) TestSpaceToBatchQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestSpaceToBatchQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestSpaceToBatchQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestSpaceToBatchQuantization validatorQSymm16(qSymm16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -1359,6 +1491,7 @@ BOOST_AUTO_TEST_CASE(QuantizeSpaceToDepth) TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); TestQuantizationParams(info, { 30.0f / g_Asymm8QuantizationBase, 128 }, + { 15.0f / g_Symm8QuantizationBase, 0}, { 15.0f / g_Symm16QuantizationBase, 0 }); } }; @@ -1377,9 +1510,14 @@ BOOST_AUTO_TEST_CASE(QuantizeSpaceToDepth) TestSpaceToDepthQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestSpaceToDepthQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestSpaceToDepthQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestSpaceToDepthQuantization validatorQSymm16(qSymm16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -1435,9 +1573,14 @@ BOOST_AUTO_TEST_CASE(QuantizePooling2d) TestPooling2dQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestPooling2dQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestPooling2dQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestPooling2dQuantization validatorQSymm16(qSymm16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -1461,7 +1604,9 @@ BOOST_AUTO_TEST_CASE(QuantizeConstant) TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); // Based off the range of values in the const tensor used for the test: [-2.0f, 6.0f] - TestQuantizationParams(info, {8.0f / g_Asymm8QuantizationBase, 64}, {6.0f / g_Symm16QuantizationBase, 0}); + TestQuantizationParams(info, {8.0f / g_Asymm8QuantizationBase, 64}, + {6.0f / g_Symm8QuantizationBase, 0}, + {6.0f / g_Symm16QuantizationBase, 0}); } }; @@ -1493,9 +1638,14 @@ BOOST_AUTO_TEST_CASE(QuantizeConstant) TestConstantQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestConstantQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestConstantQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestConstantQuantization validatorQSymm16(qSymm16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -1521,6 +1671,7 @@ BOOST_AUTO_TEST_CASE(QuantizeAbs) TestQuantizationParams(outputInfo, { 30.0f / g_Asymm8QuantizationBase, 128 }, + { 15.0f / g_Symm8QuantizationBase, 0}, { 15.0f / g_Symm16QuantizationBase, 0 }); } }; @@ -1541,9 +1692,14 @@ BOOST_AUTO_TEST_CASE(QuantizeAbs) TestAbsQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestAbsQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestAbsQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestAbsQuantization validatorQSymm16(qSymm16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -1578,6 +1734,7 @@ BOOST_AUTO_TEST_CASE(QuantizeArgMinMax) TestQuantizationParams(outputInfo, { 30.0f / g_Asymm8QuantizationBase, 128 }, + { 15.0f / g_Symm8QuantizationBase, 0}, { 15.0f / g_Symm16QuantizationBase, 0 }); } }; @@ -1613,9 +1770,14 @@ BOOST_AUTO_TEST_CASE(QuantizeArgMinMax) TestArgMinMaxQuantization validatorQAsymm8(inputShape, outputShape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestArgMinMaxQuantization validatorQSymm16(options, inputShape, outputShape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestArgMinMaxQuantization validatorQSymm8(qSymm8Options, inputShape, outputShape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestArgMinMaxQuantization validatorQSymm16(qSymm16options, inputShape, outputShape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -1639,9 +1801,10 @@ BOOST_AUTO_TEST_CASE(QuantizeComparison) TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); const OffsetScalePair qAsymm8Params{ 30.0f / g_Asymm8QuantizationBase, 128 }; + const OffsetScalePair qSymm8Params { 15.0f / g_Symm8QuantizationBase, 0}; const OffsetScalePair qSymm16Params{ 15.0f / g_Symm16QuantizationBase, 0 }; - TestQuantizationParams(info, qAsymm8Params, qSymm16Params); + TestQuantizationParams(info, qAsymm8Params, qSymm8Params, qSymm16Params); } }; @@ -1669,10 +1832,15 @@ BOOST_AUTO_TEST_CASE(QuantizeComparison) TestComparisonQuantization validatorQAsymm8(tensorShape, tensorShape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestComparisonQuantization validatorQSymm8(qSymm8Options, tensorShape, tensorShape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + // test QuantisedSymm16 quantization - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestComparisonQuantization validatorQSymm16(options, tensorShape, tensorShape); + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestComparisonQuantization validatorQSymm16(qSymm16options, tensorShape, tensorShape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -1704,7 +1872,9 @@ BOOST_AUTO_TEST_CASE(QuantizeConcat) TensorInfo outputInfo = layer->GetOutputSlot(0).GetTensorInfo(); TestQuantizationParams( - outputInfo, {60.8f / g_Asymm8QuantizationBase, 65}, {45.3f / g_Symm16QuantizationBase, 0}); + outputInfo, {60.8f / g_Asymm8QuantizationBase, 65}, + {45.3f / g_Symm8QuantizationBase, 0}, + {45.3f / g_Symm16QuantizationBase, 0}); TensorInfo inputInfo0 = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); TensorInfo inputInfo1 = layer->GetInputSlot(1).GetConnection()->GetTensorInfo(); @@ -1743,9 +1913,11 @@ BOOST_AUTO_TEST_CASE(QuantizeConcat) input2->GetOutputSlot(0).SetTensorInfo(info); concatLayer->GetOutputSlot(0).SetTensorInfo(info); - const QuantizerOptions options(DataType::QuantisedSymm16); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); INetworkQuantizerPtr quantizerPtrQAsymm8 = INetworkQuantizer::Create(network.get()); - INetworkQuantizerPtr quantizerPtrQSymm16 = INetworkQuantizer::Create(network.get(), options); + INetworkQuantizerPtr quantizerPtrQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options); + INetworkQuantizerPtr quantizerPtrQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options); // Override the input ranges float min = -15.5f; float max = 45.3f; @@ -1754,6 +1926,10 @@ BOOST_AUTO_TEST_CASE(QuantizeConcat) quantizerPtrQAsymm8->OverrideInputRange(1, (min + 6.7f), max); quantizerPtrQAsymm8->OverrideInputRange(2, min, (max - 7.8f)); + quantizerPtrQSymm8->OverrideInputRange(0, (min + 2.1f), (max - 3.2f)); + quantizerPtrQSymm8->OverrideInputRange(1, (min + 6.7f), max); + quantizerPtrQSymm8->OverrideInputRange(2, min, (max - 7.8f)); + quantizerPtrQSymm16->OverrideInputRange(0, (min + 2.1f), (max - 3.2f)); quantizerPtrQSymm16->OverrideInputRange(1, (min + 6.7f), max); quantizerPtrQSymm16->OverrideInputRange(2, min, (max - 7.8f)); @@ -1762,8 +1938,12 @@ BOOST_AUTO_TEST_CASE(QuantizeConcat) TestConcatQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); + INetworkPtr quantizedNetworkQSymm8 = quantizerPtrQSymm8->ExportNetwork(); + TestConcatQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + INetworkPtr quantizedNetworkQSymm16 = quantizerPtrQSymm16->ExportNetwork(); - TestConcatQuantization validatorQSymm16(options, shape, shape); + TestConcatQuantization validatorQSymm16(qSymm16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -1805,9 +1985,14 @@ BOOST_AUTO_TEST_CASE(QuantizeReshape) TestReshapeQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestReshapeQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestReshapeQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestReshapeQuantization validatorQSymm16(qSymm16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -1848,9 +2033,14 @@ BOOST_AUTO_TEST_CASE(QuantizeSplitter) TestSplitterQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestSplitterQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestSplitterQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestSplitterQuantization validatorQSymm16(qSymm16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -1896,9 +2086,14 @@ BOOST_AUTO_TEST_CASE(QuantizeResize) TestResizeQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestResizeQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestResizeQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestResizeQuantization validatorQSymm16(qSymm16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -1940,9 +2135,14 @@ BOOST_AUTO_TEST_CASE(QuantizeStridedSlice) TestStridedSliceQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestStridedSliceQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestStridedSliceQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestStridedSliceQuantization validatorQSymm16(qSymm16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -1984,9 +2184,14 @@ BOOST_AUTO_TEST_CASE(QuantizeBatchToSpace) TestBatchToSpaceQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestBatchToSpaceQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestBatchToSpaceQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestBatchToSpaceQuantization validatorQSymm16(qSymm16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -2031,6 +2236,7 @@ BOOST_AUTO_TEST_CASE(QuantizePrelu) // Based off current default [-15.0f, 15.0f] TestQuantizationParams(info, { 30.0f / g_Asymm8QuantizationBase, 128 }, // QASymm8 + { 15.0f / g_Symm8QuantizationBase, 0}, // QSymm8 { 15.0f / g_Symm16QuantizationBase, 0 }); // QSymm16 } @@ -2048,6 +2254,7 @@ BOOST_AUTO_TEST_CASE(QuantizePrelu) const TensorInfo& info = layer->GetOutputSlot(0).GetTensorInfo(); TestQuantizationParams(info, { 30.0f / g_Asymm8QuantizationBase, 128 }, // QASymm8 + { 15.0f / g_Symm8QuantizationBase, 0}, // QSymm8 { 15.0f / g_Symm16QuantizationBase, 0 }); // QSymm16 } @@ -2088,9 +2295,14 @@ BOOST_AUTO_TEST_CASE(QuantizePrelu) TestPreluQuantization validatorQAsymm8(inputShape, alphaShape, outputShape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestPreluQuantization validatorQSymm16(options, inputShape, alphaShape, outputShape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestPreluQuantization validatorQSymm8(qSymm8Options, inputShape, alphaShape, outputShape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestPreluQuantization validatorQSymm16(qSymm16options, inputShape, alphaShape, outputShape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -2154,10 +2366,16 @@ void TestQuantizeTransposeConvolution2d(bool useBiases) TestTransposeConvolution2dQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); + // test QSymm8 quantization + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestTransposeConvolution2dQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + // test QSymm16 quantization - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestTransposeConvolution2dQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestTransposeConvolution2dQuantization validatorQSymm16(qSymm16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -2202,6 +2420,7 @@ BOOST_AUTO_TEST_CASE(QuantizeStack) TestQuantizationParams(outputInfo, { 30.0f / g_Asymm8QuantizationBase, 128 }, + { 15.0f / g_Symm8QuantizationBase, 0}, { 15.0f / g_Symm16QuantizationBase, 0 }); } }; @@ -2227,9 +2446,14 @@ BOOST_AUTO_TEST_CASE(QuantizeStack) TestStackQuantization validatorQAsymm8(inputShape, outputShape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestStackQuantization validatorQSymm16(options, inputShape, outputShape); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestStackQuantization validatorQSymm8(qSymm8Options, inputShape, inputShape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestStackQuantization validatorQSymm16(qSymm16options, inputShape, outputShape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -2255,9 +2479,10 @@ BOOST_AUTO_TEST_CASE(QuantizeSlice) const TensorInfo& info = layer->GetOutputSlot(0).GetTensorInfo(); const OffsetScalePair qAsymm8Params{ 30.0f / g_Asymm8QuantizationBase, 128 }; + const OffsetScalePair qSymm8Params { 15.0f / g_Symm8QuantizationBase, 0}; const OffsetScalePair qSymm16Params{ 15.0f / g_Symm16QuantizationBase, 0 }; - TestQuantizationParams(info, qAsymm8Params, qSymm16Params); + TestQuantizationParams(info, qAsymm8Params, qSymm8Params, qSymm16Params); } }; @@ -2281,10 +2506,15 @@ BOOST_AUTO_TEST_CASE(QuantizeSlice) TestSliceQuantization validatorQAsymm8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); + const QuantizerOptions qSymm8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymm8 = INetworkQuantizer::Create(network.get(), qSymm8Options)->ExportNetwork(); + TestSliceQuantization validatorQSymm8(qSymm8Options, shape, shape); + VisitLayersTopologically(quantizedNetworkQSymm8.get(), validatorQSymm8); + // test QSymm16 quantization - const QuantizerOptions options(DataType::QuantisedSymm16); - INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestSliceQuantization validatorQSymm16(options, shape, shape); + const QuantizerOptions qSymm16options(DataType::QuantisedSymm16); + INetworkPtr quantizedNetworkQSymm16 = INetworkQuantizer::Create(network.get(), qSymm16options)->ExportNetwork(); + TestSliceQuantization validatorQSymm16(qSymm16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymm16.get(), validatorQSymm16); } @@ -2394,12 +2624,14 @@ void PreserveTypeTestImpl(const DataType& dataType) input1->GetOutputSlot(0).SetTensorInfo(info); addition->GetOutputSlot(0).SetTensorInfo(info); - const QuantizerOptions options(DataType::QuantisedAsymm8, true); + QuantizerOptions options = dataType == DataType::Float32 ? + QuantizerOptions(DataType::QuantisedAsymm8, true) : QuantizerOptions(dataType, true); + INetworkPtr quantizedNetworkQAsymm8 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); TestPreserveType validatorQAsymm8(options, dataType, shape, shape); VisitLayersTopologically(quantizedNetworkQAsymm8.get(), validatorQAsymm8); validatorQAsymm8.CheckQuantizeDequantizeLayerVisited( - dataType == DataType::Float32 || dataType == DataType::Float16); + dataType == DataType::Float32 || dataType == DataType::Float16); } BOOST_AUTO_TEST_CASE(PreserveTypeFloat32) @@ -2412,5 +2644,15 @@ BOOST_AUTO_TEST_CASE(PreserveTypeQAsymm8) PreserveTypeTestImpl(DataType::QuantisedAsymm8); } +BOOST_AUTO_TEST_CASE(PreserveTypeQsymm8) +{ + PreserveTypeTestImpl(DataType::QSymmS8); +} + +BOOST_AUTO_TEST_CASE(PreserveTypeQsymm16) +{ + PreserveTypeTestImpl(DataType::QuantisedSymm16); +} + BOOST_AUTO_TEST_SUITE_END() } // namespace armnn diff --git a/src/backends/aclCommon/ArmComputeTensorUtils.cpp b/src/backends/aclCommon/ArmComputeTensorUtils.cpp index 328a083ae9..9250b61ec9 100644 --- a/src/backends/aclCommon/ArmComputeTensorUtils.cpp +++ b/src/backends/aclCommon/ArmComputeTensorUtils.cpp @@ -27,7 +27,7 @@ arm_compute::DataType GetArmComputeDataType(armnn::DataType dataType) return arm_compute::DataType::QASYMM8; case armnn::DataType::QuantisedSymm16: return arm_compute::DataType::QSYMM16; - case armnn::DataType::QuantisedSymm8: + case armnn::DataType::QSymmS8: return arm_compute::DataType::QSYMM8; case armnn::DataType::QuantizedSymm8PerAxis: return arm_compute::DataType::QSYMM8_PER_CHANNEL; diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp index 56dff9be41..d9a1f46c9f 100644 --- a/src/backends/backendsCommon/WorkloadData.cpp +++ b/src/backends/backendsCommon/WorkloadData.cpp @@ -2179,6 +2179,7 @@ void QuantizeQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const ValidateDataTypes(inputTensorInfo, supportedTypes, descriptorName); if (outputTensorInfo.GetDataType() != DataType::QuantisedAsymm8 && + outputTensorInfo.GetDataType() != DataType::QSymmS8 && outputTensorInfo.GetDataType() != DataType::QuantisedSymm16) { throw InvalidArgumentException(descriptorName + ": Output of quantized layer must be quantized type."); diff --git a/src/backends/backendsCommon/test/layerTests/DequantizeTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/DequantizeTestImpl.cpp index 6a3e852ed2..844b1090ce 100644 --- a/src/backends/backendsCommon/test/layerTests/DequantizeTestImpl.cpp +++ b/src/backends/backendsCommon/test/layerTests/DequantizeTestImpl.cpp @@ -143,6 +143,13 @@ LayerTestResult DequantizeOffsetUint8Test( return DequantizeOffsetTest(workloadFactory, memoryManager); } +LayerTestResult DequantizeSimpleInt8Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) +{ + return DequantizeSimpleTest(workloadFactory, memoryManager); +} + LayerTestResult DequantizeSimpleInt16Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) @@ -158,6 +165,13 @@ LayerTestResult DequantizeSimpleUint8ToFp16Test( memoryManager); } +LayerTestResult DequantizeSimpleInt8ToFp16Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) +{ + return DequantizeSimpleTest(workloadFactory, memoryManager); +} + LayerTestResult DequantizeSimpleInt16ToFp16Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) diff --git a/src/backends/backendsCommon/test/layerTests/DequantizeTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/DequantizeTestImpl.hpp index 008fea8833..c70f03e8f3 100644 --- a/src/backends/backendsCommon/test/layerTests/DequantizeTestImpl.hpp +++ b/src/backends/backendsCommon/test/layerTests/DequantizeTestImpl.hpp @@ -20,6 +20,10 @@ LayerTestResult DequantizeOffsetUint8Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); +LayerTestResult DequantizeSimpleInt8Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + LayerTestResult DequantizeSimpleInt16Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); @@ -28,6 +32,10 @@ LayerTestResult DequantizeSimpleUint8ToFp16Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); +LayerTestResult DequantizeSimpleInt8ToFp16Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + LayerTestResult DequantizeSimpleInt16ToFp16Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); diff --git a/src/backends/backendsCommon/test/layerTests/QuantizeTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/QuantizeTestImpl.cpp index 94d7224629..481f6813b6 100644 --- a/src/backends/backendsCommon/test/layerTests/QuantizeTestImpl.cpp +++ b/src/backends/backendsCommon/test/layerTests/QuantizeTestImpl.cpp @@ -139,6 +139,13 @@ LayerTestResult QuantizeClampUint8Test( return QuantizeClampTest(workloadFactory, memoryManager); } +LayerTestResult QuantizeClampInt8Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) +{ + return QuantizeClampTest(workloadFactory, memoryManager); +} + LayerTestResult QuantizeClampInt16Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) diff --git a/src/backends/backendsCommon/test/layerTests/QuantizeTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/QuantizeTestImpl.hpp index a2e1a49763..ece75fd43b 100644 --- a/src/backends/backendsCommon/test/layerTests/QuantizeTestImpl.hpp +++ b/src/backends/backendsCommon/test/layerTests/QuantizeTestImpl.hpp @@ -18,6 +18,10 @@ LayerTestResult QuantizeClampUint8Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); +LayerTestResult QuantizeClampInt8Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + LayerTestResult QuantizeClampInt16Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp index 299503ddc6..19b76152f3 100644 --- a/src/backends/reference/RefLayerSupport.cpp +++ b/src/backends/reference/RefLayerSupport.cpp @@ -615,8 +615,9 @@ bool RefLayerSupport::IsDequantizeSupported(const TensorInfo& input, { bool supported = true; - std::array supportedInputTypes = { + std::array supportedInputTypes = { DataType::QuantisedAsymm8, + DataType::QSymmS8, DataType::QuantisedSymm16 }; @@ -1398,7 +1399,7 @@ bool RefLayerSupport::IsQuantizeSupported(const TensorInfo& input, { bool supported = true; - // Define supported output types. + // Define supported input types. std::array supportedInputTypes = { DataType::Float32, }; @@ -1407,8 +1408,9 @@ bool RefLayerSupport::IsQuantizeSupported(const TensorInfo& input, "Reference quantize: input type not supported."); // Define supported output types. - std::array supportedOutputTypes = { + std::array supportedOutputTypes = { DataType::QuantisedAsymm8, + DataType::QSymmS8, DataType::QuantisedSymm16 }; supported &= CheckSupportRule(TypeAnyOf(output, supportedOutputTypes), reasonIfUnsupported, diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp index a397e935c1..b88f432acf 100644 --- a/src/backends/reference/test/RefLayerTests.cpp +++ b/src/backends/reference/test/RefLayerTests.cpp @@ -1419,13 +1419,16 @@ BOOST_AUTO_TEST_CASE(DetectionPostProcessFastNmsInt16) // Dequantize ARMNN_AUTO_TEST_CASE(DequantizeSimpleUint8, DequantizeSimpleUint8Test) ARMNN_AUTO_TEST_CASE(DequantizeOffsetUint8, DequantizeOffsetUint8Test) +ARMNN_AUTO_TEST_CASE(DequantizeSimpleInt8, DequantizeSimpleInt8Test) ARMNN_AUTO_TEST_CASE(DequantizeSimpleInt16, DequantizeSimpleInt16Test) ARMNN_AUTO_TEST_CASE(DequantizeSimpleUint8ToFp16, DequantizeSimpleUint8ToFp16Test) +ARMNN_AUTO_TEST_CASE(DequantizeSimpleInt8ToFp16, DequantizeSimpleInt8ToFp16Test) ARMNN_AUTO_TEST_CASE(DequantizeSimpleInt16ToFp16, DequantizeSimpleInt16ToFp16Test) // Quantize ARMNN_AUTO_TEST_CASE(QuantizeSimpleUint8, QuantizeSimpleUint8Test) ARMNN_AUTO_TEST_CASE(QuantizeClampUint8, QuantizeClampUint8Test) +ARMNN_AUTO_TEST_CASE(QuantizeClampInt8, QuantizeClampInt8Test) ARMNN_AUTO_TEST_CASE(QuantizeClampInt16, QuantizeClampInt16Test) // PReLU diff --git a/src/backends/reference/workloads/BaseIterator.hpp b/src/backends/reference/workloads/BaseIterator.hpp index ca5110c2fd..ca6d3cbc60 100644 --- a/src/backends/reference/workloads/BaseIterator.hpp +++ b/src/backends/reference/workloads/BaseIterator.hpp @@ -137,6 +137,25 @@ private: const int32_t m_Offset; }; +class QSymmS8Decoder : public TypedIterator> +{ +public: + QSymmS8Decoder(const int8_t* data, const float scale, const int32_t offset) + : TypedIterator(data), m_Scale(scale), m_Offset(offset) {} + + QSymmS8Decoder(const float scale, const int32_t offset) + : QSymmS8Decoder(nullptr, scale, offset) {} + + float Get() const override + { + return armnn::Dequantize(*m_Iterator, m_Scale, m_Offset); + } + +private: + const float m_Scale; + const int32_t m_Offset; +}; + class QSymm16Decoder : public TypedIterator> { public: @@ -245,6 +264,30 @@ private: const int32_t m_Offset; }; +class QSymmS8Encoder : public TypedIterator> +{ +public: + QSymmS8Encoder(int8_t* data, const float scale, const int32_t offset) + : TypedIterator(data), m_Scale(scale), m_Offset(offset) {} + + QSymmS8Encoder(const float scale, const int32_t offset) + : QSymmS8Encoder(nullptr, scale, offset) {} + + void Set(float right) override + { + *m_Iterator = armnn::Quantize(right, m_Scale, m_Offset); + } + + float Get() const override + { + return armnn::Dequantize(*m_Iterator, m_Scale, m_Offset); + } + +private: + const float m_Scale; + const int32_t m_Offset; +}; + class QSymm16Encoder : public TypedIterator> { public: diff --git a/src/backends/reference/workloads/Decoders.hpp b/src/backends/reference/workloads/Decoders.hpp index b9cd7f9573..9d41c9e9e7 100644 --- a/src/backends/reference/workloads/Decoders.hpp +++ b/src/backends/reference/workloads/Decoders.hpp @@ -105,6 +105,13 @@ inline std::unique_ptr> MakeDecoder(const TensorInfo& info, const { return MakeSigned32Decoder(info, data); } + case DataType::QSymmS8: + { + return std::make_unique( + static_cast(data), + info.GetQuantizationScale(), + info.GetQuantizationOffset()); + } default: { BOOST_ASSERT_MSG(false, "Unsupported Data Type!"); diff --git a/src/backends/reference/workloads/Encoders.hpp b/src/backends/reference/workloads/Encoders.hpp index 0d578d68de..92493ed641 100644 --- a/src/backends/reference/workloads/Encoders.hpp +++ b/src/backends/reference/workloads/Encoders.hpp @@ -37,6 +37,13 @@ inline std::unique_ptr> MakeEncoder(const TensorInfo& info, void* info.GetQuantizationScale(), info.GetQuantizationOffset()); } + case DataType::QSymmS8: + { + return std::make_unique( + static_cast(data), + info.GetQuantizationScale(), + info.GetQuantizationOffset()); + } case armnn::DataType::QuantisedSymm16: { return std::make_unique( diff --git a/src/backends/reference/workloads/RefQuantizeWorkload.cpp b/src/backends/reference/workloads/RefQuantizeWorkload.cpp index b7ace32e14..a78804b709 100644 --- a/src/backends/reference/workloads/RefQuantizeWorkload.cpp +++ b/src/backends/reference/workloads/RefQuantizeWorkload.cpp @@ -48,6 +48,11 @@ void RefQuantizeWorkload::Execute() const QuantizeImpl(input, output, m_NumElements, m_Scale, m_Offset); break; } + case DataType::QSymmS8: + { + QuantizeImpl(input, output, m_NumElements, m_Scale, m_Offset); + break; + } case DataType::QuantisedSymm16: { QuantizeImpl(input, output, m_NumElements, m_Scale, 0); -- cgit v1.2.1