aboutsummaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorJonathan Deakin <jonathan.deakin@arm.com>2024-01-24 09:15:38 +0000
committerRadu Salavat <radu.salavat@arm.com>2024-04-15 13:52:31 +0000
commita668f9f8a4eab405df0fe8dd58e7d9425bcf9640 (patch)
treedb16e6af9289897557a58755b88d2c337dcb8650 /tests
parent34bdffb288d6367cb6dca652ebed60c450854039 (diff)
downloadComputeLibrary-a668f9f8a4eab405df0fe8dd58e7d9425bcf9640.tar.gz
Add s8f32 kernels and dynamic QuantizationInfo
- Add support for QASYMM_SIGNED*QASYMM8_SIGNED->F32 in CpuGemmLowpMatrixMultiplyCore - Add s8f32 kernel using existing s8->s32 kernels with a new DequantizeFloat OutputStage, the structure is similar to Requantize32 but the opposite way around. - Add SME s8f32 kernels with integrated support for DequantizeFloat. - Add scale to CpuGemmLowpOffsetContributionKernel. - Add virtual dequantize scale to gemm_common, only implemented for gemm_interleaved. - Update year to 2024 in generate_build_files. - Add dynamic flag to QuantizationInfo which signals to operators that it can change after configuration - Add support for dynamic quantization in NEGEMMLowpMatrixMultiplyCore - Add dynamic quantization fixture by extending GEMMLowpGenericMatrixMultiplyCoreValidationFixture - Add GEMMLowpDequantizedMatrixMultiplyValidationFixture - Store k (number of cols of A) rather than k_offset in the offset contribution kernels so that we can recompute it when the other offsets change relates to: ONCPUML-1444 MLINFSW-439 Co-authored-by: Milos Puzovic <Milos.Puzovic@arm.com> Co-authored-by: David Mansell <David.Mansell@arm.com> Change-Id: I58a3acf2c09289a303e52eea6b336a696a5bc8da Signed-off-by: Jonathan Deakin <jonathan.deakin@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11022 Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'tests')
-rw-r--r--tests/validation/NEON/GEMMLowp.cpp34
-rw-r--r--tests/validation/fixtures/GEMMLowpFixture.h121
-rw-r--r--tests/validation/reference/QuantizationLayer.cpp12
3 files changed, 146 insertions, 21 deletions
diff --git a/tests/validation/NEON/GEMMLowp.cpp b/tests/validation/NEON/GEMMLowp.cpp
index 514d156983..1231c21ddb 100644
--- a/tests/validation/NEON/GEMMLowp.cpp
+++ b/tests/validation/NEON/GEMMLowp.cpp
@@ -53,6 +53,7 @@ namespace
{
constexpr AbsoluteTolerance<float> tolerance_batched(1);
constexpr AbsoluteTolerance<float> tolerance_quant(1);
+ constexpr AbsoluteTolerance<float> tolerance_dequantized(0.01f);
} // namespace
@@ -62,6 +63,11 @@ TEST_SUITE(MatrixMultiplyCore)
using NEGEMMLowpMatrixMultiplyCoreFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>;
using NEGEMMLowpMatrixMultiplyCoreAccumulateFixture = GEMMLowpMatrixMultiplyAccumulateValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>;
+using NEGEMMLowpBatchedMatMulFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, true>;
+using NEGEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture = GEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>;
+using NEGEMMLowpDequantizedMatrixMultiplyValidationFixture = GEMMLowpDequantizedMatrixMultiplyValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>;
+
+using framework::dataset::make;
DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, framework::dataset::concat(datasets::SmallGEMMLowpDataset(), datasets::LargeGEMMLowpDataset()),
shape_a, shape_b, shape_c, a_offset, b_offset)
@@ -337,6 +343,34 @@ TEST_SUITE_END() // S32
TEST_SUITE_END() // ACCUMULATION
#endif // __arch64__
+TEST_SUITE(DynamicQuantization)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset())
+{
+ // Validate output
+ validate(Accessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpDataset())
+{
+ // Validate output
+ validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() // DynamicQuantization
+
+TEST_SUITE(Dequant)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpDequantizedMatrixMultiplyValidationFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset())
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_dequantized);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpDequantizedMatrixMultiplyValidationFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpDataset())
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_dequantized);
+}
+TEST_SUITE_END() // Dequant
+
TEST_SUITE_END() // MatrixMultiplyCore
TEST_SUITE_END() // GEMMLowp
TEST_SUITE_END() // NEON
diff --git a/tests/validation/fixtures/GEMMLowpFixture.h b/tests/validation/fixtures/GEMMLowpFixture.h
index 0f6908a468..11a491faa7 100644
--- a/tests/validation/fixtures/GEMMLowpFixture.h
+++ b/tests/validation/fixtures/GEMMLowpFixture.h
@@ -31,6 +31,7 @@
#include "tests/validation/Validation.h"
#include "tests/validation/reference/GEMMLowp.h"
#include "tests/validation/reference/ArithmeticOperations.h"
+#include "tests/validation/reference/QuantizationLayer.h"
#include <cstdint>
#include <vector>
@@ -57,11 +58,21 @@ void fill_quantized(U &&tensor, int i)
}
template <typename U>
-void fill_s32(U &&tensor, int i, int32_t min, int32_t max)
+void fill(U &&tensor, int i, int32_t min, int32_t max)
{
- ARM_COMPUTE_ASSERT(tensor.data_type() == DataType::S32);
- std::uniform_int_distribution<int32_t> distribution(min, max);
- library->fill(tensor, distribution, i);
+ if (tensor.data_type() == DataType::S32) {
+ std::uniform_int_distribution<int32_t> distribution(min, max);
+ library->fill(tensor, distribution, i);
+ }
+ else if(tensor.data_type() == DataType::F32)
+ {
+ std::uniform_real_distribution<float> distribution((float)min, (float)max);
+ library->fill(tensor, distribution, i);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("NOT SUPPORTED!");
+ }
}
/** Information about how to fill tensors */
@@ -83,22 +94,25 @@ template <typename TensorType, typename AccessorType, typename FunctionType, boo
TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo,
const QuantizationInfo& output_qinfo, DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8,
GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(), bool reshape_b_only_on_first_run = false, const TensorFillInfo& finfo = TensorFillInfo(),
- bool accumulate = false)
+ bool accumulate = false, bool dynamic_qinfo = false, DataType data_type_output = DataType::UNKNOWN)
{
ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type_a));
ARM_COMPUTE_ASSERT(data_type_a == data_type_b);
- // Create tensors
- const DataType data_type_output = output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : data_type_a;
+ // If unknown, set to sensible defaults
+ if (data_type_output == DataType::UNKNOWN) {
+ data_type_output = output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : data_type_a;
+ }
- TensorType a = create_tensor<TensorType>(shape_a, data_type_a, 1, a_qinfo);
- TensorType b = create_tensor<TensorType>(shape_b, data_type_b, 1, b_qinfo); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated
+ // Create tensors
+ TensorType a = create_tensor<TensorType>(shape_a, data_type_a, 1, dynamic_qinfo ? QuantizationInfo(1.0,0,true) : a_qinfo);
+ TensorType b = create_tensor<TensorType>(shape_b, data_type_b, 1, dynamic_qinfo ? QuantizationInfo(1.0,0,true) : b_qinfo); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated
TensorType output = create_tensor<TensorType>(shape_output, data_type_output, 1, output_qinfo /* output_qinfo will be ignored when output stage type is None */);
TensorType bias;
if(is_fused)
{
TensorShape bias_shape(shape_b[0]);
- bias = create_tensor<TensorType>(bias_shape, DataType::S32, 1);
+ bias = create_tensor<TensorType>(bias_shape,data_type_output == DataType::F32 ? DataType::F32 : DataType::S32, 1);
}
// Create and configure function
@@ -109,6 +123,13 @@ TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape
arm_compute::ActivationLayerInfo(), false /* fixed_format */, arm_compute::WeightFormat::UNSPECIFIED,
false /* pretranspose_B */, accumulate));
+ // If the QuantizationInfo is dynamic, it needs to be settable after configure (note that we also force it to be dynamic)
+ if (dynamic_qinfo)
+ {
+ a.info()->set_quantization_info(QuantizationInfo(a_qinfo.scale(), a_qinfo.offset(), true));
+ b.info()->set_quantization_info(QuantizationInfo(b_qinfo.scale(), b_qinfo.offset(), true));
+ }
+
ARM_COMPUTE_ASSERT(a.info()->is_resizable());
ARM_COMPUTE_ASSERT(b.info()->is_resizable());
ARM_COMPUTE_ASSERT(output.info()->is_resizable());
@@ -131,7 +152,7 @@ TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape
if (accumulate)
{
ARM_COMPUTE_ASSERT(accumulate != run_twice);
- fill_s32(AccessorType(output), 6 + finfo.hash, finfo.min_output, finfo.max_output);
+ fill(AccessorType(output), 6 + finfo.hash, finfo.min_output, finfo.max_output);
}
if(is_fused)
@@ -139,7 +160,7 @@ TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape
ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
bias.allocator()->allocate();
ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
- fill_s32(AccessorType(bias), 2 + finfo.hash, finfo.min_bias, finfo.max_bias);
+ fill(AccessorType(bias), 2 + finfo.hash, finfo.min_bias, finfo.max_bias);
}
// Run with variable inputs.
@@ -150,7 +171,7 @@ TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape
fill_quantized(AccessorType(b), 4 + finfo.hash);
if(is_fused)
{
- fill_s32(AccessorType(bias), 5 + finfo.hash, finfo.min_bias, finfo.max_bias);
+ fill(AccessorType(bias), 5 + finfo.hash, finfo.min_bias, finfo.max_bias);
}
}
@@ -225,21 +246,20 @@ template <typename TensorType, typename AccessorType, typename FunctionType, boo
class GEMMLowpGenericMatrixMultiplyCoreValidationFixture : public framework::Fixture
{
public:
- void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, bool accumulate=false)
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, bool accumulate=false, bool dynamic_qinfo = false)
{
const auto a_qinfo = QuantizationInfo(1.0f / 255, a_offset);
const auto b_qinfo = QuantizationInfo(1.0f / 255, b_offset);
TensorFillInfo finfo;
- _target = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate);
+ _target = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate, dynamic_qinfo);
_reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate);
}
protected:
- TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const bool accumulate)
+ TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const bool accumulate, const bool dynamic_qinfo)
{
const auto output_qinfo = QuantizationInfo(); // No output stage
- return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo,
- DataType::QASYMM8, DataType::QASYMM8, GEMMLowpOutputStageInfo(), false, finfo, accumulate);
+ return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, DataType::QASYMM8, DataType::QASYMM8, GEMMLowpOutputStageInfo(), false, finfo, accumulate, dynamic_qinfo);
}
SimpleTensor<int32_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, bool accumulate)
@@ -250,7 +270,7 @@ protected:
if (accumulate)
{
SimpleTensor<int32_t> output{ shape_output, DataType::S32, 1 };
- fill_s32(output, 6 + finfo.hash, finfo.min_output, finfo.max_output);
+ fill(output, 6 + finfo.hash, finfo.min_output, finfo.max_output);
reference::arithmetic_operation<int32_t>(reference::ArithmeticOperation::ADD, output, ref_output, output, ConvertPolicy::SATURATE);
return output;
}
@@ -282,6 +302,16 @@ public:
}
};
+template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool run_twice = false>
+class GEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture : protected GEMMLowpGenericMatrixMultiplyCoreValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, run_twice>
+{
+public:
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset)
+ {
+ GEMMLowpGenericMatrixMultiplyCoreValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, run_twice>::setup(shape_a, shape_b, shape_output, a_offset, b_offset, false /* accumulate */, true /* dynamic_qinfo */);
+ }
+};
+
template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, typename TI = uint8_t, typename TW = uint8_t, bool run_twice = false>
class GEMMLowpGenericMatrixMultiplyCoreFusedOffsetOutputValidationFixture : public framework::Fixture
{
@@ -417,7 +447,7 @@ protected:
TensorShape bias_shape(shape_b[0]);
SimpleTensor<int32_t> bias{ bias_shape, DataType::S32, 1 };
- (run_twice) ? fill_s32(bias, 5 + finfo.hash, finfo.min_bias, finfo.max_bias) : fill_s32(bias, 2 + finfo.hash, finfo.min_bias, finfo.max_bias); // Fill bias with same seed as last run of gemmlowp_target
+ (run_twice) ? fill(bias, 5 + finfo.hash, finfo.min_bias, finfo.max_bias) : fill(bias, 2 + finfo.hash, finfo.min_bias, finfo.max_bias); // Fill bias with same seed as last run of gemmlowp_target
switch(output_stage.type)
{
@@ -438,6 +468,57 @@ protected:
SimpleTensor<TI> _reference{};
};
+template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool run_twice = false>
+class GEMMLowpDequantizedMatrixMultiplyValidationFixture : public framework::Fixture
+{
+public:
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset)
+ {
+ // Accumulation is supported for Int8/UInt8 only in aarch64
+ bool accumulate = true;
+ // Accumulation is not supported for Int8/UInt8 in aarch32
+#ifdef __arm__
+ accumulate = false;
+#endif //__arm__
+ bool dynamic_qinfo = false;
+ const auto a_qinfo = QuantizationInfo(1.0f / 255, a_offset);
+ const auto b_qinfo = QuantizationInfo(5.0f / 255, b_offset);
+ TensorFillInfo finfo;
+ _target = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate, dynamic_qinfo);
+ _reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate);
+ }
+
+protected:
+ TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const bool accumulate, const bool dynamic_qinfo)
+ {
+ const auto output_qinfo = QuantizationInfo();
+ return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, GEMMLowpOutputStageInfo(), false, finfo, accumulate, dynamic_qinfo, DataType::F32);
+ }
+
+ SimpleTensor<float> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, bool accumulate)
+ {
+ SimpleTensor<int32_t> s32_ref_output = compute_gemmlowp_reference<reinterpret_input_as_3d, int8_t, int8_t, false, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo,
+ DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, finfo);
+
+ SimpleTensor<float> f32_ref_output(s32_ref_output.shape(), DataType::F32);
+ QuantizationInfo dst_quant_info = QuantizationInfo(a_qinfo.uniform().scale * b_qinfo.uniform().scale, 0);
+ f32_ref_output = reference::quantization_layer<int32_t, float>(s32_ref_output, DataType::F32, dst_quant_info);
+
+ if (accumulate)
+ {
+ SimpleTensor<float> output{ shape_output, DataType::F32, 1 };
+ fill(output, 6 + finfo.hash, finfo.min_output, finfo.max_output);
+ reference::arithmetic_operation<float>(reference::ArithmeticOperation::ADD, output, f32_ref_output, output, ConvertPolicy::SATURATE);
+ return output;
+ }
+
+ return f32_ref_output;
+ }
+
+ TensorType _target{};
+ SimpleTensor<float> _reference{};
+};
+
template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, typename TI = uint8_t, typename TW = uint8_t, bool run_twice = false>
class GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture : public GEMMLowpGenericMatrixMultiplyCoreFusedOffsetOutputValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, TI, TW, run_twice>
{
diff --git a/tests/validation/reference/QuantizationLayer.cpp b/tests/validation/reference/QuantizationLayer.cpp
index 27665375c3..b76263bf95 100644
--- a/tests/validation/reference/QuantizationLayer.cpp
+++ b/tests/validation/reference/QuantizationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -80,6 +80,15 @@ SimpleTensor<Tout> quantization_layer(const SimpleTensor<Tin> &src, DataType out
dst[i] = quantize_qasymm16((src[i]), qinfo, rounding_policy);
}
break;
+ case DataType::F32:
+#if defined(_OPENMP)
+ #pragma omp parallel for
+#endif /* _OPENMP */
+ for(int i = 0; i < src.num_elements(); ++i)
+ {
+ dst[i] = dequantize_s32((src[i]), qinfo);
+ }
+ break;
default:
ARM_COMPUTE_ERROR("Unsupported output data type");
}
@@ -127,6 +136,7 @@ template SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<half> &src,
template SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<float> &src, DataType output_data_type, const QuantizationInfo &quantization_info);
template SimpleTensor<uint16_t> quantization_layer(const SimpleTensor<half> &src, DataType output_data_type, const QuantizationInfo &quantization_info);
template SimpleTensor<uint16_t> quantization_layer(const SimpleTensor<float> &src, DataType output_data_type, const QuantizationInfo &quantization_info);
+template SimpleTensor<float> quantization_layer(const SimpleTensor<int32_t> &src, DataType output_data_type, const QuantizationInfo &quantization_info);
} // namespace reference
} // namespace validation
} // namespace test