aboutsummaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorVidhya Sudhan Loganathan <vidhyasudhan.loganathan@arm.com>2019-11-04 14:42:08 +0000
committerMichele Di Giorgio <michele.digiorgio@arm.com>2019-11-14 16:25:06 +0000
commit951b8a4c01de2810349b6f16cf9bbba7578484fa (patch)
tree8b3ab1c04279da7be3afd6632a9894b6197c1e1b /tests
parentcd4e9abf7a165f15ccd10ac4541365d4f8a6db19 (diff)
downloadComputeLibrary-951b8a4c01de2810349b6f16cf9bbba7578484fa.tar.gz
COMPMID-2309 : CLConvolutionLayer: support QUANT8_SYMM_PER_CHANNEL filters
Change-Id: I16f6758b768ede404a064db057302ded706e1e8a Signed-off-by: Vidhya Sudhan Loganathan <vidhyasudhan.loganathan@arm.com> Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/2215 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'tests')
-rw-r--r--tests/AssetsLibrary.h29
-rw-r--r--tests/datasets/GEMMLowpFusedOffsetOutputDataset.h32
-rw-r--r--tests/validate_examples/cl_gemm.cpp7
-rw-r--r--tests/validation/CL/ConvolutionLayer.cpp31
-rw-r--r--tests/validation/CL/GEMMLowp.cpp24
-rw-r--r--tests/validation/CL/WeightsReshape.cpp13
-rw-r--r--tests/validation/NEON/GEMMLowp.cpp24
-rw-r--r--tests/validation/fixtures/GEMMLowpFixture.h122
-rw-r--r--tests/validation/reference/GEMMLowp.cpp71
-rw-r--r--tests/validation/reference/GEMMLowp.h27
10 files changed, 277 insertions, 103 deletions
diff --git a/tests/AssetsLibrary.h b/tests/AssetsLibrary.h
index f8635ea576..9a22b2fefb 100644
--- a/tests/AssetsLibrary.h
+++ b/tests/AssetsLibrary.h
@@ -216,6 +216,19 @@ public:
/** Fills the specified @p raw tensor with random values drawn from @p
* distribution.
*
+ * @param[in, out] vec To be filled vector.
+ * @param[in] distribution Distribution used to fill the tensor.
+ * @param[in] seed_offset The offset will be added to the global seed before initialising the random generator.
+ *
+ * @note The @p distribution has to provide operator(Generator &) which
+ * will be used to draw samples.
+ */
+ template <typename T, typename D>
+ void fill(std::vector<T> &vec, D &&distribution, std::random_device::result_type seed_offset) const;
+
+ /** Fills the specified @p raw tensor with random values drawn from @p
+ * distribution.
+ *
* @param[in, out] raw To be filled raw.
* @param[in] distribution Distribution used to fill the tensor.
* @param[in] seed_offset The offset will be added to the global seed before initialising the random generator.
@@ -522,6 +535,22 @@ void AssetsLibrary::fill_boxes(T &&tensor, D &&distribution, std::random_device:
}
template <typename T, typename D>
+void AssetsLibrary::fill(std::vector<T> &vec, D &&distribution, std::random_device::result_type seed_offset) const
+{
+ ARM_COMPUTE_ERROR_ON_MSG(vec.empty(), "Vector must not be empty");
+
+ using ResultType = typename std::remove_reference<D>::type::result_type;
+
+ std::mt19937 gen(_seed + seed_offset);
+ for(size_t i = 0; i < vec.size(); ++i)
+ {
+ const ResultType value = distribution(gen);
+
+ vec[i] = value;
+ }
+}
+
+template <typename T, typename D>
void AssetsLibrary::fill(T &&tensor, D &&distribution, std::random_device::result_type seed_offset) const
{
using ResultType = typename std::remove_reference<D>::type::result_type;
diff --git a/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h b/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h
index c94019e3d5..cde1fe8978 100644
--- a/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h
+++ b/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h
@@ -69,10 +69,22 @@ public:
description << "b_offset=" << *_b_offset_it << ":";
description << "output_type=" << string_from_gemmlowp_output_stage((*_output_stage_it).type) << ":";
description << "output_offset=" << (*_output_stage_it).gemmlowp_offset << ":";
- description << "output_multiplier=" << (*_output_stage_it).gemmlowp_multiplier << ":";
- description << "output_shift=" << (*_output_stage_it).gemmlowp_shift << ":";
+ description << "output_multiplier={";
+ for(auto it = (*_output_stage_it).gemmlowp_multipliers.begin(); it != (*_output_stage_it).gemmlowp_multipliers.end(); ++it)
+ {
+ description << (*it) << ", ";
+ }
+ description << "}:";
+ description << "output_shift={";
+
+ for(auto it = (*_output_stage_it).gemmlowp_shifts.begin(); it != (*_output_stage_it).gemmlowp_shifts.end(); ++it)
+ {
+ description << (*it) << ", ";
+ }
+ description << "}:";
description << "output_min=" << (*_output_stage_it).gemmlowp_min_bound << ":";
description << "output_max=" << (*_output_stage_it).gemmlowp_max_bound << ":";
+ description << "is_quantized_per_channel=" << (*_output_stage_it).is_quantized_per_channel << ":";
return description.str();
}
@@ -132,6 +144,8 @@ public:
output_stage.gemmlowp_shift = shift;
output_stage.gemmlowp_min_bound = min;
output_stage.gemmlowp_max_bound = max;
+ output_stage.gemmlowp_multipliers.push_back(multiplier);
+ output_stage.gemmlowp_shifts.push_back(shift);
return output_stage;
}
@@ -172,12 +186,24 @@ public:
}
};
+class SmallGEMMLowpFusedOffsetOutputPerChannelDataset final : public GEMMLowpFusedOffsetOutputDataset
+{
+public:
+ SmallGEMMLowpFusedOffsetOutputPerChannelDataset()
+ {
+ add_config(TensorShape(21U, 1U, 6U), TensorShape(43U, 21U, 6U), TensorShape(43U, 1U, 6U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -200, 2, 13, 10, 210));
+ add_config(TensorShape(21U, 13U, 3U), TensorShape(33U, 21U, 3U), TensorShape(33U, 13U, 3U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -100, 2, 13, 10, 210));
+ add_config(TensorShape(31U, 3U, 2U), TensorShape(72U, 31U, 2U), TensorShape(72U, 3U, 2U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, 10, 210));
+ add_config(TensorShape(52U, 13U, 7U), TensorShape(33U, 52U, 7U), TensorShape(33U, 13U, 7U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 100, 2, 13, 10, 210));
+ add_config(TensorShape(52U, 26U, 8U), TensorShape(33U, 52U, 8U), TensorShape(33U, 26U, 8U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, 10, 210));
+ }
+};
class LargeGEMMLowpFusedOffsetOutputDataset final : public GEMMLowpFusedOffsetOutputDataset
{
public:
LargeGEMMLowpFusedOffsetOutputDataset()
{
- add_config(TensorShape(923U, 1U), TensorShape(871U, 923U), TensorShape(871U, 1U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -200, 2, 18, 10, 210));
+ add_config(TensorShape(923U, 1U, 15U), TensorShape(871U, 923U, 15U), TensorShape(871U, 1U, 15U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -200, 2, 18, 10, 210));
add_config(TensorShape(923U, 429U), TensorShape(871U, 923U), TensorShape(871U, 429U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -100, 2, 18, 10, 210));
add_config(TensorShape(873U, 7U), TensorShape(784U, 873U), TensorShape(784U, 7U), -1, 3, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 18, 10, 210));
add_config(TensorShape(873U, 513U), TensorShape(784U, 873U), TensorShape(784U, 513U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 100, 2, 18, 10, 210));
diff --git a/tests/validate_examples/cl_gemm.cpp b/tests/validate_examples/cl_gemm.cpp
index 4e406cbd9b..128c5f6e7f 100644
--- a/tests/validate_examples/cl_gemm.cpp
+++ b/tests/validate_examples/cl_gemm.cpp
@@ -313,16 +313,19 @@ public:
SimpleTensor<int32_t> ref_tmp_dst = reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(ref_src0, ref_src1, TensorShape(N, M, B), offset_src0, offset_src1);
+ const std::vector<int32_t> dst_multiplier_vec = { dst_multiplier };
+ const std::vector<int32_t> dst_shift_vec = { dst_shift };
+
if(add_bias)
{
SimpleTensor<int32_t> biases{ TensorShape(N), DataType::S32, 1 };
// Fill bias
fill(biases, 3);
- ref_dst = reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint<int32_t>(ref_tmp_dst, biases, dst_multiplier, dst_shift, offset_dst);
+ ref_dst = reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint<int32_t>(ref_tmp_dst, biases, dst_multiplier_vec, dst_shift_vec, offset_dst);
}
else
{
- ref_dst = reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint<int32_t>(ref_tmp_dst, dst_multiplier, dst_shift, offset_dst);
+ ref_dst = reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint<int32_t>(ref_tmp_dst, dst_multiplier_vec, dst_shift_vec, offset_dst);
}
validate(CLAccessor(dst), ref_dst);
break;
diff --git a/tests/validation/CL/ConvolutionLayer.cpp b/tests/validation/CL/ConvolutionLayer.cpp
index f1f9b59330..9eb6c6d41d 100644
--- a/tests/validation/CL/ConvolutionLayer.cpp
+++ b/tests/validation/CL/ConvolutionLayer.cpp
@@ -271,6 +271,8 @@ TEST_SUITE_END() // Float
template <typename T>
using CLGEMMConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T>;
+template <typename T>
+using CLGEMMConvolutionLayerQuantizedPerChannelFixture = ConvolutionValidationQuantizedPerChannelFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T, int8_t>;
const auto QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
{
@@ -285,7 +287,6 @@ const auto QuantizedActivationFunctionsSmallDataset = framework::dataset::make("
});
TEST_SUITE(Quantized)
-TEST_SUITE(QASYMM8)
const auto QuantizationData = framework::dataset::make("QuantizationInfo",
{
@@ -293,6 +294,7 @@ const auto QuantizationData = framework::dataset::make("QuantizationInfo",
QuantizationInfo(0.3f, 3),
QuantizationInfo(1.f, 10),
});
+TEST_SUITE(QASYMM8)
FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerReducedDataset(),
@@ -317,6 +319,33 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMConvolutionLayerQuantizedFixture<uint8_t>
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
TEST_SUITE_END() // QASYMM8
+TEST_SUITE(QSYMM8_PER_CHANNEL)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerReducedDataset(),
+ framework::dataset::make("ReshapeWeights", { true })),
+ framework::dataset::make("DataType", { DataType::QASYMM8 })),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ QuantizationData),
+ QuantizedActivationFunctionsSmallDataset),
+ framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
+ framework::dataset::make("ReshapeWeights", { true })),
+ framework::dataset::make("DataType", { DataType::QASYMM8 })),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ QuantizationData),
+ QuantizedActivationFunctionsDataset),
+ framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // QSYMM8_PER_CHANNEL
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // GEMMConvolutionLayer
diff --git a/tests/validation/CL/GEMMLowp.cpp b/tests/validation/CL/GEMMLowp.cpp
index f5bd871f90..39543b174c 100644
--- a/tests/validation/CL/GEMMLowp.cpp
+++ b/tests/validation/CL/GEMMLowp.cpp
@@ -86,13 +86,15 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFixture, framework:
using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore>;
TEST_SUITE(FusedOffsetOutput)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpFusedOffsetOutputDataset())
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputDataset(),
+ framework::dataset::make("DataType", { DataType::QASYMM8 })))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpFusedOffsetOutputDataset())
+FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMLowpFusedOffsetOutputDataset(),
+ framework::dataset::make("DataType", { DataType::QASYMM8 })))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -305,13 +307,17 @@ const auto quantize_down_int32_to_int16_scale_by_fixedpoint_relu_cases = framewo
2)
* framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true });
-const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases = framework::dataset::make("result_fixedpoint_multiplier", 1073741823, 1073741825) * framework::dataset::make("result_shift", -3,
- -2)
- * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
-
-const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", -3,
- -1)
- * framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true });
+const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases = framework::dataset::make("result_fixedpoint_multiplier", 1073741823,
+ 1073741825)
+ * framework::dataset::make("result_shift", -3,
+ -2)
+ * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+
+const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600,
+ 254601602)
+ * framework::dataset::make("result_shift", -3,
+ -1)
+ * framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true });
using CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture =
GEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointValidationFixture<CLTensor, CLAccessor, CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint>;
diff --git a/tests/validation/CL/WeightsReshape.cpp b/tests/validation/CL/WeightsReshape.cpp
index 30c231d499..47cb975527 100644
--- a/tests/validation/CL/WeightsReshape.cpp
+++ b/tests/validation/CL/WeightsReshape.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -47,22 +47,19 @@ using CLWeightsReshape = CLSynthetizeFunction<CLWeightsReshapeKernel>;
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::U8), // Unsupported data type
- TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32), // Mismatching data type
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32), // Mismatching data type
TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::QASYMM8), // Bias not supported with QASYMM8
TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
}),
- framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(4U), 1, DataType::U8),
- TensorInfo(TensorShape(4U), 1, DataType::F16),
+ framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(4U), 1, DataType::F16),
TensorInfo(TensorShape(4U), 1, DataType::QASYMM8),
TensorInfo(TensorShape(4U), 1, DataType::F32),
})),
- framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(4U, 19U), 1, DataType::U8),
- TensorInfo(TensorShape(4U, 19U), 1, DataType::F16),
+ framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(4U, 19U), 1, DataType::F16),
TensorInfo(TensorShape(4U, 19U), 1, DataType::QASYMM8),
TensorInfo(TensorShape(4U, 19U), 1, DataType::F32),
})),
- framework::dataset::make("Expected", { false, false, false, true })),
+ framework::dataset::make("Expected", { false, false, true })),
input_info, biases_info, output_info, expected)
{
bool status = bool(CLWeightsReshape::validate(&input_info, &biases_info, &output_info));
diff --git a/tests/validation/NEON/GEMMLowp.cpp b/tests/validation/NEON/GEMMLowp.cpp
index d79374efa7..b79523da1a 100644
--- a/tests/validation/NEON/GEMMLowp.cpp
+++ b/tests/validation/NEON/GEMMLowp.cpp
@@ -147,13 +147,15 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFixture, framework:
using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>;
TEST_SUITE(FusedOffsetOutput)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpFusedOffsetOutputDataset())
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputDataset(),
+ framework::dataset::make("DataType", { DataType::QASYMM8 })))
{
// Validate output
validate(Accessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpFusedOffsetOutputDataset())
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMLowpFusedOffsetOutputDataset(),
+ framework::dataset::make("DataType", { DataType::QASYMM8 })))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -417,13 +419,17 @@ const auto quantize_down_int32_to_int16_scale_by_fixedpoint_cases = framework::d
const auto quantize_down_int32_to_int16_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
2)
* framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true });
-const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases = framework::dataset::make("result_fixedpoint_multiplier", 1073741823, 1073741825) * framework::dataset::make("result_shift", -3,
- -2)
- * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
-
-const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", -3,
- -1)
- * framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true });
+const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases = framework::dataset::make("result_fixedpoint_multiplier", 1073741823,
+ 1073741825)
+ * framework::dataset::make("result_shift", -3,
+ -2)
+ * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+
+const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600,
+ 254601602)
+ * framework::dataset::make("result_shift", -3,
+ -1)
+ * framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true });
using NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture =
GEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointValidationFixture<Tensor, Accessor, NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint>;
diff --git a/tests/validation/fixtures/GEMMLowpFixture.h b/tests/validation/fixtures/GEMMLowpFixture.h
index 8385221c78..5d092ecac2 100644
--- a/tests/validation/fixtures/GEMMLowpFixture.h
+++ b/tests/validation/fixtures/GEMMLowpFixture.h
@@ -26,6 +26,7 @@
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "tests/AssetsLibrary.h"
#include "tests/Globals.h"
#include "tests/IAccessor.h"
@@ -47,23 +48,66 @@ namespace
template <typename U>
void fill(U &&tensor, int i)
{
- // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
- std::uniform_int_distribution<> distribution(1, 254);
- library->fill(tensor, distribution, i);
+ switch(tensor.data_type())
+ {
+ case DataType::QSYMM8_PER_CHANNEL:
+ {
+ int min_bound = 128;
+ int max_bound = -127;
+ for(size_t j = 0; j < tensor.quantization_info().scale().size(); j++)
+ {
+ std::pair<int, int> bounds = get_symm_quantized_per_channel_bounds(tensor.quantization_info(), -1.0f, 1.0f, i);
+ if(bounds.first < min_bound)
+ {
+ min_bound = bounds.first;
+ }
+ if(bounds.second > max_bound)
+ {
+ max_bound = bounds.second;
+ }
+ }
+ std::uniform_int_distribution<int8_t> distribution(min_bound, max_bound);
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ case DataType::QASYMM8:
+ {
+ std::uniform_int_distribution<uint8_t> distribution(1, 254);
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ case DataType::F16:
+ case DataType::F32:
+ {
+ // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
+ std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ default:
+ library->fill_tensor_uniform(tensor, i);
+ }
}
template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d, bool reinterpret_output_as_3d, typename OutputType, bool is_fused = false>
TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset,
- GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo())
+ GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(), DataType data_type_b = DataType::QASYMM8, QuantizationInfo b_qinfo = QuantizationInfo())
{
// Create tensors
TensorType a = create_tensor<TensorType>(shape_a, DataType::QASYMM8, 1);
- TensorType b = create_tensor<TensorType>(shape_b, DataType::QASYMM8, 1);
+ TensorType b = create_tensor<TensorType>(shape_b, data_type_b, 1); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated
TensorType output = create_tensor<TensorType>(shape_output, output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : DataType::QASYMM8, 1);
a.info()->set_quantization_info(QuantizationInfo(1.0f / 255, a_offset));
- b.info()->set_quantization_info(QuantizationInfo(1.0f / 255, b_offset));
+ if(data_type_b == DataType::QSYMM8_PER_CHANNEL)
+ {
+ b.info()->set_quantization_info(b_qinfo);
+ }
+ else
+ {
+ b.info()->set_quantization_info(QuantizationInfo(1.0f / 255, b_offset));
+ }
TensorType bias;
if(is_fused)
{
@@ -101,14 +145,14 @@ TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape
ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS);
fill(AccessorType(bias), 2);
}
-
// Compute GEMM function
gemmlowp.run();
return output;
}
-template <bool reinterpret_input_as_3d>
-SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset)
+template <bool reinterpret_input_as_3d, typename TW = uint8_t>
+SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset,
+ DataType data_type_b = DataType::QASYMM8, QuantizationInfo b_qinfo = QuantizationInfo())
{
TensorShape shape_a_to_use = shape_a;
if(reinterpret_input_as_3d)
@@ -119,13 +163,12 @@ SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, con
// Create reference
SimpleTensor<uint8_t> a{ shape_a_to_use, DataType::QASYMM8, 1 };
- SimpleTensor<uint8_t> b{ shape_b, DataType::QASYMM8, 1 };
+ SimpleTensor<TW> b{ shape_b, data_type_b, 1, data_type_b == DataType::QSYMM8_PER_CHANNEL ? b_qinfo : QuantizationInfo(1.0f / 255, b_offset) };
// Fill reference
fill(a, 0);
fill(b, 1);
-
- return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(a, b, shape_output, a_offset, b_offset);
+ return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t, TW>(a, b, shape_output, a_offset, b_offset);
}
}
@@ -155,29 +198,50 @@ protected:
SimpleTensor<int32_t> _reference{};
};
-template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false>
+template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, typename TW = uint8_t>
class GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture : public framework::Fixture
{
public:
template <typename...>
- void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage)
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage, DataType data_type_b)
{
ARM_COMPUTE_EXPECT(output_stage.type != GEMMLowpOutputStageType::NONE, framework::LogLevel::ERRORS);
- _target = compute_target(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage);
- _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage);
+ if(data_type_b == DataType::QSYMM8_PER_CHANNEL)
+ {
+ output_stage.is_quantized_per_channel = true;
+ const size_t num_channels = shape_b[0];
+ std::vector<float> scales(num_channels);
+ std::uniform_real_distribution<> distribution(0, 1);
+ library->fill(scales, distribution, 0);
+ output_stage.gemmlowp_multipliers.resize(num_channels);
+ output_stage.gemmlowp_shifts.resize(num_channels);
+ for(size_t i = 0; i < num_channels; ++i)
+ {
+ quantization::calculate_quantized_multiplier_less_than_one(scales[i], &output_stage.gemmlowp_multipliers[i], &output_stage.gemmlowp_shifts[i]);
+ }
+
+ _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, 0, output_stage, data_type_b, QuantizationInfo(scales));
+ _target = compute_target(shape_a, shape_b, shape_output, a_offset, 0, output_stage, data_type_b, QuantizationInfo(scales));
+ }
+ else
+ {
+ _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage, data_type_b, QuantizationInfo());
+ _target = compute_target(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage, data_type_b, QuantizationInfo());
+ }
}
protected:
- TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage)
+ TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage,
+ DataType data_type_b, QuantizationInfo b_qinfo)
{
return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, qasymm8_t, true>(shape_a, shape_b, shape_output, a_offset, b_offset,
- output_stage);
+ output_stage, data_type_b, b_qinfo);
}
SimpleTensor<qasymm8_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset,
- GEMMLowpOutputStageInfo output_stage)
+ GEMMLowpOutputStageInfo output_stage, DataType data_type_b, QuantizationInfo b_qinfo)
{
- SimpleTensor<int32_t> output = compute_gemmlowp_reference<reinterpret_input_as_3d>(shape_a, shape_b, shape_output, a_offset, b_offset);
+ SimpleTensor<int32_t> output = compute_gemmlowp_reference<reinterpret_input_as_3d, TW>(shape_a, shape_b, shape_output, a_offset, b_offset, data_type_b, b_qinfo);
TensorShape bias_shape(shape_b[0]);
SimpleTensor<int32_t> bias{ bias_shape, DataType::S32, 1 };
@@ -187,11 +251,11 @@ protected:
{
case GEMMLowpOutputStageType::QUANTIZE_DOWN:
return reference::gemmlowp_quantize_down_int32_to_uint8_scale<int32_t>(output, bias,
- output_stage.gemmlowp_offset, output_stage.gemmlowp_multiplier, output_stage.gemmlowp_shift, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound);
+ output_stage.gemmlowp_offset, output_stage.gemmlowp_multipliers, output_stage.gemmlowp_shifts, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound);
break;
case GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT:
return reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint<int32_t>(output, bias,
- output_stage.gemmlowp_multiplier, output_stage.gemmlowp_shift, output_stage.gemmlowp_offset, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound);
+ output_stage.gemmlowp_multipliers, output_stage.gemmlowp_shifts, output_stage.gemmlowp_offset, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound);
break;
default:
ARM_COMPUTE_ERROR("Not Supported!");
@@ -276,16 +340,19 @@ protected:
// Fill reference
fill(a, 0);
+ const std::vector<int32_t> result_mult_int_vec = { result_mult_int };
+ const std::vector<int32_t> result_shift_vec = { result_shift };
+
if(add_bias)
{
// Fill bias
fill(b, 1);
- return reference::gemmlowp_quantize_down_int32_to_uint8_scale<int32_t>(a, b, result_offset, result_mult_int, result_shift, min, max);
+ return reference::gemmlowp_quantize_down_int32_to_uint8_scale<int32_t>(a, b, result_offset, result_mult_int_vec, result_shift_vec, min, max);
}
else
{
- return reference::gemmlowp_quantize_down_int32_to_uint8_scale<int32_t>(a, result_offset, result_mult_int, result_shift, min, max);
+ return reference::gemmlowp_quantize_down_int32_to_uint8_scale<int32_t>(a, result_offset, result_mult_int_vec, result_shift_vec, min, max);
}
}
@@ -368,16 +435,19 @@ protected:
// Fill reference
fill(a, 0);
+ const std::vector<int32_t> result_fixed_point_multiplier_vec = { result_fixed_point_multiplier };
+ const std::vector<int32_t> result_shift_vec = { result_shift };
+
if(add_bias)
{
// Fill bias
fill(b, 1);
- return reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint<int32_t>(a, b, result_fixed_point_multiplier, result_shift, result_offset_after_shift, min, max);
+ return reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint<int32_t>(a, b, result_fixed_point_multiplier_vec, result_shift_vec, result_offset_after_shift, min, max);
}
else
{
- return reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint<int32_t>(a, result_fixed_point_multiplier, result_shift, result_offset_after_shift, min, max);
+ return reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint<int32_t>(a, result_fixed_point_multiplier_vec, result_shift_vec, result_offset_after_shift, min, max);
}
}
diff --git a/tests/validation/reference/GEMMLowp.cpp b/tests/validation/reference/GEMMLowp.cpp
index 4283cb5bac..08be4a5182 100644
--- a/tests/validation/reference/GEMMLowp.cpp
+++ b/tests/validation/reference/GEMMLowp.cpp
@@ -39,10 +39,11 @@ namespace reference
namespace
{
template <typename T>
-void quantize_down_int32_to_uint8_scale(const SimpleTensor<T> *in, const SimpleTensor<T> *bias, SimpleTensor<uint8_t> *dst, int32_t result_offset, int32_t result_mult_int, int32_t result_shift,
- int32_t min, int32_t max)
+void quantize_down_int32_to_uint8_scale(const SimpleTensor<T> *in, const SimpleTensor<T> *bias, SimpleTensor<uint8_t> *dst, int32_t result_offset, std::vector<int32_t> result_mult_int,
+ std::vector<int32_t> result_shift, int32_t min, int32_t max)
{
- const int cols_in = in->shape().x();
+ const int cols_in = in->shape().x();
+ const bool is_per_channel = result_mult_int.size() > 1;
for(int i = 0; i < in->num_elements(); ++i)
{
@@ -53,9 +54,9 @@ void quantize_down_int32_to_uint8_scale(const SimpleTensor<T> *in, const SimpleT
result += (*bias)[i % cols_in];
}
- result *= result_mult_int;
+ result *= (is_per_channel) ? result_mult_int[i % cols_in] : result_mult_int[0];
- result >>= result_shift;
+ result >>= (is_per_channel) ? result_shift[i % cols_in] : result_shift[0];
// Bounded ReLu
if(min != max)
@@ -68,10 +69,11 @@ void quantize_down_int32_to_uint8_scale(const SimpleTensor<T> *in, const SimpleT
}
template <typename T>
-void quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> *in, const SimpleTensor<T> *bias, SimpleTensor<uint8_t> *dst, int32_t result_fixedpoint_multiplier, int32_t result_shift,
- int32_t result_offset_after_shift, int32_t min, int32_t max)
+void quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> *in, const SimpleTensor<T> *bias, SimpleTensor<uint8_t> *dst, std::vector<int32_t> result_fixedpoint_multiplier,
+ std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max)
{
- const int cols_in = in->shape().x();
+ const int cols_in = in->shape().x();
+ const bool is_per_channel = result_fixedpoint_multiplier.size() > 1;
for(int i = 0; i < in->num_elements(); ++i)
{
@@ -83,7 +85,10 @@ void quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> *in,
}
// Fixed point multiplication
- result = asymm_rounding_divide_by_pow2(asymm_int_mult(result, result_fixedpoint_multiplier), result_shift);
+ const int32_t multiplier = (is_per_channel) ? result_fixedpoint_multiplier[i % cols_in] : result_fixedpoint_multiplier[0];
+ const int32_t shift = (is_per_channel) ? result_shift[i % cols_in] : result_shift[0];
+
+ result = asymm_rounding_divide_by_pow2(asymm_int_mult(result, multiplier), shift);
result += result_offset_after_shift;
// Bounded ReLu
@@ -132,8 +137,8 @@ void quantize_down_int32_to_int16_scale_by_fixedpoint(const SimpleTensor<T> *in,
}
} // namespace
-template <typename T_out, typename T_in>
-SimpleTensor<T_out> gemmlowp_matrix_multiply_core(const SimpleTensor<T_in> &a, const SimpleTensor<T_in> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset)
+template <typename T_out, typename T_in, typename T_in_1>
+SimpleTensor<T_out> gemmlowp_matrix_multiply_core(const SimpleTensor<T_in> &a, const SimpleTensor<T_in_1> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset)
{
static_assert(std::is_same<typename std::decay<T_out>::type, int32_t>::value, "Only int32_t is allowed for the output");
@@ -186,14 +191,15 @@ SimpleTensor<T_out> gemmlowp_matrix_multiply_core(const SimpleTensor<T_in> &a, c
}
// used to validate assembly kernels which don't know anything about offsets
-template <typename T1, typename T2>
-SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T2> &b, TensorShape shape_c)
+template <typename T1, typename T2, typename T3>
+SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c)
{
- return gemmlowp_matrix_multiply_core<T1, T2>(a, b, shape_c, 0, 0);
+ return gemmlowp_matrix_multiply_core<T1, T2, T3>(a, b, shape_c, 0, 0);
}
template <typename T>
-SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max)
+SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, int32_t result_offset, std::vector<int32_t> result_mult_int, std::vector<int32_t> result_shift,
+ int32_t min, int32_t max)
{
SimpleTensor<uint8_t> dst(in.shape(), DataType::QASYMM8);
@@ -203,8 +209,8 @@ SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTe
}
template <typename T>
-SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_offset, int32_t result_mult_int, int32_t result_shift,
- int32_t min, int32_t max)
+SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_offset, std::vector<int32_t> result_mult_int,
+ std::vector<int32_t> result_shift, int32_t min, int32_t max)
{
SimpleTensor<uint8_t> dst(in.shape(), DataType::QASYMM8);
@@ -214,9 +220,8 @@ SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTe
}
template <typename T>
-SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, int32_t result_fixedpoint_multiplier, int32_t result_shift,
- int32_t result_offset_after_shift, int32_t min,
- int32_t max)
+SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, std::vector<int32_t> result_fixedpoint_multiplier, std::vector<int32_t> result_shift,
+ int32_t result_offset_after_shift, int32_t min, int32_t max)
{
SimpleTensor<uint8_t> dst(in.shape(), DataType::QASYMM8);
@@ -226,8 +231,8 @@ SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(
}
template <typename T>
-SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_fixedpoint_multiplier, int32_t result_shift,
- int32_t result_offset_after_shift, int32_t min, int32_t max)
+SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, std::vector<int32_t> result_fixedpoint_multiplier,
+ std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max)
{
SimpleTensor<uint8_t> dst(in.shape(), DataType::QASYMM8);
@@ -258,22 +263,24 @@ SimpleTensor<int16_t> gemmlowp_quantize_down_int32_to_int16_scale_by_fixedpoint(
return dst;
}
-template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, int32_t result_fixedpoint_multiplier, int32_t result_shift,
- int32_t result_offset_after_shift, int32_t min, int32_t max);
-template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_fixedpoint_multiplier,
- int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
+template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, std::vector<int32_t> result_fixedpoint_multiplier,
+ std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
+template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
+ std::vector<int32_t> result_fixedpoint_multiplier,
+ std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
template SimpleTensor<int16_t> gemmlowp_quantize_down_int32_to_int16_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, int32_t result_fixedpoint_multiplier, int32_t result_shift,
int32_t min, int32_t max);
template SimpleTensor<int16_t> gemmlowp_quantize_down_int32_to_int16_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_fixedpoint_multiplier,
int32_t result_shift, int32_t min, int32_t max);
-template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<int32_t> &a, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min,
- int32_t max);
-template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_offset, int32_t result_mult_int,
- int32_t result_shift, int32_t min, int32_t max);
+template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<int32_t> &a, int32_t result_offset, std::vector<int32_t> result_mult_int,
+ std::vector<int32_t> result_shift, int32_t min, int32_t max);
+template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_offset, std::vector<int32_t> result_mult_int,
+ std::vector<int32_t> result_shift, int32_t min, int32_t max);
template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);
template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);
-template SimpleTensor<int32_t> gemmlowp(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c);
-template SimpleTensor<int32_t> gemmlowp(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c);
+template SimpleTensor<int32_t> gemmlowp<int32_t, int8_t, int8_t>(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c);
+template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, uint8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c);
+template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, int8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/GEMMLowp.h b/tests/validation/reference/GEMMLowp.h
index 5581f67652..815527e1b7 100644
--- a/tests/validation/reference/GEMMLowp.h
+++ b/tests/validation/reference/GEMMLowp.h
@@ -35,31 +35,32 @@ namespace validation
{
namespace reference
{
-template <typename T>
-SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min = 0, int32_t max = 0);
-template <typename T1, typename T2>
-SimpleTensor<T1> gemmlowp_matrix_multiply_core(const SimpleTensor<T2> &a, const SimpleTensor<T2> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);
+template <typename T1, typename T2, typename T3>
+SimpleTensor<T1> gemmlowp_matrix_multiply_core(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);
-template <typename T>
-SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, int32_t result_offset, int32_t result_mult_int, int32_t result_shift);
+template <typename T1, typename T2, typename T3 = T2>
+SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c);
-template <typename T1, typename T2>
-SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T2> &b, TensorShape shape_c);
+template <typename T>
+SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, int32_t result_offset, std::vector<int32_t> result_mult_int, std::vector<int32_t> result_shift);
template <typename T>
-SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_offset, int32_t result_mult_int, int32_t result_shift,
+SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, int32_t result_offset, std::vector<int32_t> result_mult_int, std::vector<int32_t> result_shift,
int32_t min = 0, int32_t max = 0);
template <typename T>
-SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, int32_t result_fixedpoint_multiplier, int32_t result_shift,
- int32_t result_offset_after_shift,
- int32_t min = 0, int32_t max = 0);
+SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_offset, std::vector<int32_t> result_mult_int,
+ std::vector<int32_t> result_shift, int32_t min = 0, int32_t max = 0);
template <typename T>
-SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_fixedpoint_multiplier, int32_t result_shift,
+SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, std::vector<int32_t> result_fixedpoint_multiplier, std::vector<int32_t> result_shift,
int32_t result_offset_after_shift, int32_t min = 0, int32_t max = 0);
template <typename T>
+SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, std::vector<int32_t> result_fixedpoint_multiplier,
+ std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min = 0, int32_t max = 0);
+
+template <typename T>
SimpleTensor<int16_t> gemmlowp_quantize_down_int32_to_int16_scale_by_fixedpoint(const SimpleTensor<T> &in, int32_t result_fixedpoint_multiplier, int32_t result_shift,
int32_t min, int32_t max);
template <typename T>