diff options
Diffstat (limited to 'tests')
-rw-r--r-- | tests/AssetsLibrary.h | 29 | ||||
-rw-r--r-- | tests/datasets/GEMMLowpFusedOffsetOutputDataset.h | 32 | ||||
-rw-r--r-- | tests/validate_examples/cl_gemm.cpp | 7 | ||||
-rw-r--r-- | tests/validation/CL/ConvolutionLayer.cpp | 31 | ||||
-rw-r--r-- | tests/validation/CL/GEMMLowp.cpp | 24 | ||||
-rw-r--r-- | tests/validation/CL/WeightsReshape.cpp | 13 | ||||
-rw-r--r-- | tests/validation/NEON/GEMMLowp.cpp | 24 | ||||
-rw-r--r-- | tests/validation/fixtures/GEMMLowpFixture.h | 122 | ||||
-rw-r--r-- | tests/validation/reference/GEMMLowp.cpp | 71 | ||||
-rw-r--r-- | tests/validation/reference/GEMMLowp.h | 27 |
10 files changed, 277 insertions, 103 deletions
diff --git a/tests/AssetsLibrary.h b/tests/AssetsLibrary.h index f8635ea576..9a22b2fefb 100644 --- a/tests/AssetsLibrary.h +++ b/tests/AssetsLibrary.h @@ -216,6 +216,19 @@ public: /** Fills the specified @p raw tensor with random values drawn from @p * distribution. * + * @param[in, out] vec To be filled vector. + * @param[in] distribution Distribution used to fill the tensor. + * @param[in] seed_offset The offset will be added to the global seed before initialising the random generator. + * + * @note The @p distribution has to provide operator(Generator &) which + * will be used to draw samples. + */ + template <typename T, typename D> + void fill(std::vector<T> &vec, D &&distribution, std::random_device::result_type seed_offset) const; + + /** Fills the specified @p raw tensor with random values drawn from @p + * distribution. + * * @param[in, out] raw To be filled raw. * @param[in] distribution Distribution used to fill the tensor. * @param[in] seed_offset The offset will be added to the global seed before initialising the random generator. @@ -522,6 +535,22 @@ void AssetsLibrary::fill_boxes(T &&tensor, D &&distribution, std::random_device: } template <typename T, typename D> +void AssetsLibrary::fill(std::vector<T> &vec, D &&distribution, std::random_device::result_type seed_offset) const +{ + ARM_COMPUTE_ERROR_ON_MSG(vec.empty(), "Vector must not be empty"); + + using ResultType = typename std::remove_reference<D>::type::result_type; + + std::mt19937 gen(_seed + seed_offset); + for(size_t i = 0; i < vec.size(); ++i) + { + const ResultType value = distribution(gen); + + vec[i] = value; + } +} + +template <typename T, typename D> void AssetsLibrary::fill(T &&tensor, D &&distribution, std::random_device::result_type seed_offset) const { using ResultType = typename std::remove_reference<D>::type::result_type; diff --git a/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h b/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h index c94019e3d5..cde1fe8978 100644 --- a/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h +++ b/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h @@ -69,10 +69,22 @@ public: description << "b_offset=" << *_b_offset_it << ":"; description << "output_type=" << string_from_gemmlowp_output_stage((*_output_stage_it).type) << ":"; description << "output_offset=" << (*_output_stage_it).gemmlowp_offset << ":"; - description << "output_multiplier=" << (*_output_stage_it).gemmlowp_multiplier << ":"; - description << "output_shift=" << (*_output_stage_it).gemmlowp_shift << ":"; + description << "output_multiplier={"; + for(auto it = (*_output_stage_it).gemmlowp_multipliers.begin(); it != (*_output_stage_it).gemmlowp_multipliers.end(); ++it) + { + description << (*it) << ", "; + } + description << "}:"; + description << "output_shift={"; + + for(auto it = (*_output_stage_it).gemmlowp_shifts.begin(); it != (*_output_stage_it).gemmlowp_shifts.end(); ++it) + { + description << (*it) << ", "; + } + description << "}:"; description << "output_min=" << (*_output_stage_it).gemmlowp_min_bound << ":"; description << "output_max=" << (*_output_stage_it).gemmlowp_max_bound << ":"; + description << "is_quantized_per_channel=" << (*_output_stage_it).is_quantized_per_channel << ":"; return description.str(); } @@ -132,6 +144,8 @@ public: output_stage.gemmlowp_shift = shift; output_stage.gemmlowp_min_bound = min; output_stage.gemmlowp_max_bound = max; + output_stage.gemmlowp_multipliers.push_back(multiplier); + output_stage.gemmlowp_shifts.push_back(shift); return output_stage; } @@ -172,12 +186,24 @@ public: } }; +class SmallGEMMLowpFusedOffsetOutputPerChannelDataset final : public GEMMLowpFusedOffsetOutputDataset +{ +public: + SmallGEMMLowpFusedOffsetOutputPerChannelDataset() + { + add_config(TensorShape(21U, 1U, 6U), TensorShape(43U, 21U, 6U), TensorShape(43U, 1U, 6U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -200, 2, 13, 10, 210)); + add_config(TensorShape(21U, 13U, 3U), TensorShape(33U, 21U, 3U), TensorShape(33U, 13U, 3U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -100, 2, 13, 10, 210)); + add_config(TensorShape(31U, 3U, 2U), TensorShape(72U, 31U, 2U), TensorShape(72U, 3U, 2U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, 10, 210)); + add_config(TensorShape(52U, 13U, 7U), TensorShape(33U, 52U, 7U), TensorShape(33U, 13U, 7U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 100, 2, 13, 10, 210)); + add_config(TensorShape(52U, 26U, 8U), TensorShape(33U, 52U, 8U), TensorShape(33U, 26U, 8U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, 10, 210)); + } +}; class LargeGEMMLowpFusedOffsetOutputDataset final : public GEMMLowpFusedOffsetOutputDataset { public: LargeGEMMLowpFusedOffsetOutputDataset() { - add_config(TensorShape(923U, 1U), TensorShape(871U, 923U), TensorShape(871U, 1U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -200, 2, 18, 10, 210)); + add_config(TensorShape(923U, 1U, 15U), TensorShape(871U, 923U, 15U), TensorShape(871U, 1U, 15U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -200, 2, 18, 10, 210)); add_config(TensorShape(923U, 429U), TensorShape(871U, 923U), TensorShape(871U, 429U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -100, 2, 18, 10, 210)); add_config(TensorShape(873U, 7U), TensorShape(784U, 873U), TensorShape(784U, 7U), -1, 3, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 18, 10, 210)); add_config(TensorShape(873U, 513U), TensorShape(784U, 873U), TensorShape(784U, 513U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 100, 2, 18, 10, 210)); diff --git a/tests/validate_examples/cl_gemm.cpp b/tests/validate_examples/cl_gemm.cpp index 4e406cbd9b..128c5f6e7f 100644 --- a/tests/validate_examples/cl_gemm.cpp +++ b/tests/validate_examples/cl_gemm.cpp @@ -313,16 +313,19 @@ public: SimpleTensor<int32_t> ref_tmp_dst = reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(ref_src0, ref_src1, TensorShape(N, M, B), offset_src0, offset_src1); + const std::vector<int32_t> dst_multiplier_vec = { dst_multiplier }; + const std::vector<int32_t> dst_shift_vec = { dst_shift }; + if(add_bias) { SimpleTensor<int32_t> biases{ TensorShape(N), DataType::S32, 1 }; // Fill bias fill(biases, 3); - ref_dst = reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint<int32_t>(ref_tmp_dst, biases, dst_multiplier, dst_shift, offset_dst); + ref_dst = reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint<int32_t>(ref_tmp_dst, biases, dst_multiplier_vec, dst_shift_vec, offset_dst); } else { - ref_dst = reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint<int32_t>(ref_tmp_dst, dst_multiplier, dst_shift, offset_dst); + ref_dst = reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint<int32_t>(ref_tmp_dst, dst_multiplier_vec, dst_shift_vec, offset_dst); } validate(CLAccessor(dst), ref_dst); break; diff --git a/tests/validation/CL/ConvolutionLayer.cpp b/tests/validation/CL/ConvolutionLayer.cpp index f1f9b59330..9eb6c6d41d 100644 --- a/tests/validation/CL/ConvolutionLayer.cpp +++ b/tests/validation/CL/ConvolutionLayer.cpp @@ -271,6 +271,8 @@ TEST_SUITE_END() // Float template <typename T> using CLGEMMConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T>; +template <typename T> +using CLGEMMConvolutionLayerQuantizedPerChannelFixture = ConvolutionValidationQuantizedPerChannelFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T, int8_t>; const auto QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationInfo", { @@ -285,7 +287,6 @@ const auto QuantizedActivationFunctionsSmallDataset = framework::dataset::make(" }); TEST_SUITE(Quantized) -TEST_SUITE(QASYMM8) const auto QuantizationData = framework::dataset::make("QuantizationInfo", { @@ -293,6 +294,7 @@ const auto QuantizationData = framework::dataset::make("QuantizationInfo", QuantizationInfo(0.3f, 3), QuantizationInfo(1.f, 10), }); +TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerReducedDataset(), @@ -317,6 +319,33 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMConvolutionLayerQuantizedFixture<uint8_t> validate(CLAccessor(_target), _reference, tolerance_qasymm8); } TEST_SUITE_END() // QASYMM8 +TEST_SUITE(QSYMM8_PER_CHANNEL) + +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerReducedDataset(), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", { DataType::QASYMM8 })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + QuantizationData), + QuantizedActivationFunctionsSmallDataset), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", { DataType::QASYMM8 })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + QuantizationData), + QuantizedActivationFunctionsDataset), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() // QSYMM8_PER_CHANNEL TEST_SUITE_END() // Quantized TEST_SUITE_END() // GEMMConvolutionLayer diff --git a/tests/validation/CL/GEMMLowp.cpp b/tests/validation/CL/GEMMLowp.cpp index f5bd871f90..39543b174c 100644 --- a/tests/validation/CL/GEMMLowp.cpp +++ b/tests/validation/CL/GEMMLowp.cpp @@ -86,13 +86,15 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFixture, framework: using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore>; TEST_SUITE(FusedOffsetOutput) -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpFusedOffsetOutputDataset()) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputDataset(), + framework::dataset::make("DataType", { DataType::QASYMM8 }))) { // Validate output validate(CLAccessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpFusedOffsetOutputDataset()) +FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMLowpFusedOffsetOutputDataset(), + framework::dataset::make("DataType", { DataType::QASYMM8 }))) { // Validate output validate(CLAccessor(_target), _reference); @@ -305,13 +307,17 @@ const auto quantize_down_int32_to_int16_scale_by_fixedpoint_relu_cases = framewo 2) * framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true }); -const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases = framework::dataset::make("result_fixedpoint_multiplier", 1073741823, 1073741825) * framework::dataset::make("result_shift", -3, - -2) - * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true }); - -const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", -3, - -1) - * framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true }); +const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases = framework::dataset::make("result_fixedpoint_multiplier", 1073741823, + 1073741825) + * framework::dataset::make("result_shift", -3, + -2) + * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true }); + +const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, + 254601602) + * framework::dataset::make("result_shift", -3, + -1) + * framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true }); using CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture = GEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointValidationFixture<CLTensor, CLAccessor, CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint>; diff --git a/tests/validation/CL/WeightsReshape.cpp b/tests/validation/CL/WeightsReshape.cpp index 30c231d499..47cb975527 100644 --- a/tests/validation/CL/WeightsReshape.cpp +++ b/tests/validation/CL/WeightsReshape.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -47,22 +47,19 @@ using CLWeightsReshape = CLSynthetizeFunction<CLWeightsReshapeKernel>; // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::U8), // Unsupported data type - TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32), // Mismatching data type + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32), // Mismatching data type TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::QASYMM8), // Bias not supported with QASYMM8 TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32), }), - framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(4U), 1, DataType::U8), - TensorInfo(TensorShape(4U), 1, DataType::F16), + framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(4U), 1, DataType::F16), TensorInfo(TensorShape(4U), 1, DataType::QASYMM8), TensorInfo(TensorShape(4U), 1, DataType::F32), })), - framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(4U, 19U), 1, DataType::U8), - TensorInfo(TensorShape(4U, 19U), 1, DataType::F16), + framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(4U, 19U), 1, DataType::F16), TensorInfo(TensorShape(4U, 19U), 1, DataType::QASYMM8), TensorInfo(TensorShape(4U, 19U), 1, DataType::F32), })), - framework::dataset::make("Expected", { false, false, false, true })), + framework::dataset::make("Expected", { false, false, true })), input_info, biases_info, output_info, expected) { bool status = bool(CLWeightsReshape::validate(&input_info, &biases_info, &output_info)); diff --git a/tests/validation/NEON/GEMMLowp.cpp b/tests/validation/NEON/GEMMLowp.cpp index d79374efa7..b79523da1a 100644 --- a/tests/validation/NEON/GEMMLowp.cpp +++ b/tests/validation/NEON/GEMMLowp.cpp @@ -147,13 +147,15 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFixture, framework: using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>; TEST_SUITE(FusedOffsetOutput) -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpFusedOffsetOutputDataset()) +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputDataset(), + framework::dataset::make("DataType", { DataType::QASYMM8 }))) { // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpFusedOffsetOutputDataset()) +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMLowpFusedOffsetOutputDataset(), + framework::dataset::make("DataType", { DataType::QASYMM8 }))) { // Validate output validate(Accessor(_target), _reference); @@ -417,13 +419,17 @@ const auto quantize_down_int32_to_int16_scale_by_fixedpoint_cases = framework::d const auto quantize_down_int32_to_int16_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, 2) * framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true }); -const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases = framework::dataset::make("result_fixedpoint_multiplier", 1073741823, 1073741825) * framework::dataset::make("result_shift", -3, - -2) - * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true }); - -const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", -3, - -1) - * framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true }); +const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases = framework::dataset::make("result_fixedpoint_multiplier", 1073741823, + 1073741825) + * framework::dataset::make("result_shift", -3, + -2) + * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true }); + +const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, + 254601602) + * framework::dataset::make("result_shift", -3, + -1) + * framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true }); using NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture = GEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointValidationFixture<Tensor, Accessor, NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint>; diff --git a/tests/validation/fixtures/GEMMLowpFixture.h b/tests/validation/fixtures/GEMMLowpFixture.h index 8385221c78..5d092ecac2 100644 --- a/tests/validation/fixtures/GEMMLowpFixture.h +++ b/tests/validation/fixtures/GEMMLowpFixture.h @@ -26,6 +26,7 @@ #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "tests/AssetsLibrary.h" #include "tests/Globals.h" #include "tests/IAccessor.h" @@ -47,23 +48,66 @@ namespace template <typename U> void fill(U &&tensor, int i) { - // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path - std::uniform_int_distribution<> distribution(1, 254); - library->fill(tensor, distribution, i); + switch(tensor.data_type()) + { + case DataType::QSYMM8_PER_CHANNEL: + { + int min_bound = 128; + int max_bound = -127; + for(size_t j = 0; j < tensor.quantization_info().scale().size(); j++) + { + std::pair<int, int> bounds = get_symm_quantized_per_channel_bounds(tensor.quantization_info(), -1.0f, 1.0f, i); + if(bounds.first < min_bound) + { + min_bound = bounds.first; + } + if(bounds.second > max_bound) + { + max_bound = bounds.second; + } + } + std::uniform_int_distribution<int8_t> distribution(min_bound, max_bound); + library->fill(tensor, distribution, i); + break; + } + case DataType::QASYMM8: + { + std::uniform_int_distribution<uint8_t> distribution(1, 254); + library->fill(tensor, distribution, i); + break; + } + case DataType::F16: + case DataType::F32: + { + // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path + std::uniform_real_distribution<> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } } template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d, bool reinterpret_output_as_3d, typename OutputType, bool is_fused = false> TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, - GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo()) + GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(), DataType data_type_b = DataType::QASYMM8, QuantizationInfo b_qinfo = QuantizationInfo()) { // Create tensors TensorType a = create_tensor<TensorType>(shape_a, DataType::QASYMM8, 1); - TensorType b = create_tensor<TensorType>(shape_b, DataType::QASYMM8, 1); + TensorType b = create_tensor<TensorType>(shape_b, data_type_b, 1); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated TensorType output = create_tensor<TensorType>(shape_output, output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : DataType::QASYMM8, 1); a.info()->set_quantization_info(QuantizationInfo(1.0f / 255, a_offset)); - b.info()->set_quantization_info(QuantizationInfo(1.0f / 255, b_offset)); + if(data_type_b == DataType::QSYMM8_PER_CHANNEL) + { + b.info()->set_quantization_info(b_qinfo); + } + else + { + b.info()->set_quantization_info(QuantizationInfo(1.0f / 255, b_offset)); + } TensorType bias; if(is_fused) { @@ -101,14 +145,14 @@ TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); fill(AccessorType(bias), 2); } - // Compute GEMM function gemmlowp.run(); return output; } -template <bool reinterpret_input_as_3d> -SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset) +template <bool reinterpret_input_as_3d, typename TW = uint8_t> +SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, + DataType data_type_b = DataType::QASYMM8, QuantizationInfo b_qinfo = QuantizationInfo()) { TensorShape shape_a_to_use = shape_a; if(reinterpret_input_as_3d) @@ -119,13 +163,12 @@ SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, con // Create reference SimpleTensor<uint8_t> a{ shape_a_to_use, DataType::QASYMM8, 1 }; - SimpleTensor<uint8_t> b{ shape_b, DataType::QASYMM8, 1 }; + SimpleTensor<TW> b{ shape_b, data_type_b, 1, data_type_b == DataType::QSYMM8_PER_CHANNEL ? b_qinfo : QuantizationInfo(1.0f / 255, b_offset) }; // Fill reference fill(a, 0); fill(b, 1); - - return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(a, b, shape_output, a_offset, b_offset); + return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t, TW>(a, b, shape_output, a_offset, b_offset); } } @@ -155,29 +198,50 @@ protected: SimpleTensor<int32_t> _reference{}; }; -template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false> +template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, typename TW = uint8_t> class GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture : public framework::Fixture { public: template <typename...> - void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage) + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage, DataType data_type_b) { ARM_COMPUTE_EXPECT(output_stage.type != GEMMLowpOutputStageType::NONE, framework::LogLevel::ERRORS); - _target = compute_target(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage); - _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage); + if(data_type_b == DataType::QSYMM8_PER_CHANNEL) + { + output_stage.is_quantized_per_channel = true; + const size_t num_channels = shape_b[0]; + std::vector<float> scales(num_channels); + std::uniform_real_distribution<> distribution(0, 1); + library->fill(scales, distribution, 0); + output_stage.gemmlowp_multipliers.resize(num_channels); + output_stage.gemmlowp_shifts.resize(num_channels); + for(size_t i = 0; i < num_channels; ++i) + { + quantization::calculate_quantized_multiplier_less_than_one(scales[i], &output_stage.gemmlowp_multipliers[i], &output_stage.gemmlowp_shifts[i]); + } + + _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, 0, output_stage, data_type_b, QuantizationInfo(scales)); + _target = compute_target(shape_a, shape_b, shape_output, a_offset, 0, output_stage, data_type_b, QuantizationInfo(scales)); + } + else + { + _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage, data_type_b, QuantizationInfo()); + _target = compute_target(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage, data_type_b, QuantizationInfo()); + } } protected: - TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage) + TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage, + DataType data_type_b, QuantizationInfo b_qinfo) { return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, qasymm8_t, true>(shape_a, shape_b, shape_output, a_offset, b_offset, - output_stage); + output_stage, data_type_b, b_qinfo); } SimpleTensor<qasymm8_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, - GEMMLowpOutputStageInfo output_stage) + GEMMLowpOutputStageInfo output_stage, DataType data_type_b, QuantizationInfo b_qinfo) { - SimpleTensor<int32_t> output = compute_gemmlowp_reference<reinterpret_input_as_3d>(shape_a, shape_b, shape_output, a_offset, b_offset); + SimpleTensor<int32_t> output = compute_gemmlowp_reference<reinterpret_input_as_3d, TW>(shape_a, shape_b, shape_output, a_offset, b_offset, data_type_b, b_qinfo); TensorShape bias_shape(shape_b[0]); SimpleTensor<int32_t> bias{ bias_shape, DataType::S32, 1 }; @@ -187,11 +251,11 @@ protected: { case GEMMLowpOutputStageType::QUANTIZE_DOWN: return reference::gemmlowp_quantize_down_int32_to_uint8_scale<int32_t>(output, bias, - output_stage.gemmlowp_offset, output_stage.gemmlowp_multiplier, output_stage.gemmlowp_shift, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound); + output_stage.gemmlowp_offset, output_stage.gemmlowp_multipliers, output_stage.gemmlowp_shifts, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound); break; case GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT: return reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint<int32_t>(output, bias, - output_stage.gemmlowp_multiplier, output_stage.gemmlowp_shift, output_stage.gemmlowp_offset, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound); + output_stage.gemmlowp_multipliers, output_stage.gemmlowp_shifts, output_stage.gemmlowp_offset, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound); break; default: ARM_COMPUTE_ERROR("Not Supported!"); @@ -276,16 +340,19 @@ protected: // Fill reference fill(a, 0); + const std::vector<int32_t> result_mult_int_vec = { result_mult_int }; + const std::vector<int32_t> result_shift_vec = { result_shift }; + if(add_bias) { // Fill bias fill(b, 1); - return reference::gemmlowp_quantize_down_int32_to_uint8_scale<int32_t>(a, b, result_offset, result_mult_int, result_shift, min, max); + return reference::gemmlowp_quantize_down_int32_to_uint8_scale<int32_t>(a, b, result_offset, result_mult_int_vec, result_shift_vec, min, max); } else { - return reference::gemmlowp_quantize_down_int32_to_uint8_scale<int32_t>(a, result_offset, result_mult_int, result_shift, min, max); + return reference::gemmlowp_quantize_down_int32_to_uint8_scale<int32_t>(a, result_offset, result_mult_int_vec, result_shift_vec, min, max); } } @@ -368,16 +435,19 @@ protected: // Fill reference fill(a, 0); + const std::vector<int32_t> result_fixed_point_multiplier_vec = { result_fixed_point_multiplier }; + const std::vector<int32_t> result_shift_vec = { result_shift }; + if(add_bias) { // Fill bias fill(b, 1); - return reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint<int32_t>(a, b, result_fixed_point_multiplier, result_shift, result_offset_after_shift, min, max); + return reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint<int32_t>(a, b, result_fixed_point_multiplier_vec, result_shift_vec, result_offset_after_shift, min, max); } else { - return reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint<int32_t>(a, result_fixed_point_multiplier, result_shift, result_offset_after_shift, min, max); + return reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint<int32_t>(a, result_fixed_point_multiplier_vec, result_shift_vec, result_offset_after_shift, min, max); } } diff --git a/tests/validation/reference/GEMMLowp.cpp b/tests/validation/reference/GEMMLowp.cpp index 4283cb5bac..08be4a5182 100644 --- a/tests/validation/reference/GEMMLowp.cpp +++ b/tests/validation/reference/GEMMLowp.cpp @@ -39,10 +39,11 @@ namespace reference namespace { template <typename T> -void quantize_down_int32_to_uint8_scale(const SimpleTensor<T> *in, const SimpleTensor<T> *bias, SimpleTensor<uint8_t> *dst, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, - int32_t min, int32_t max) +void quantize_down_int32_to_uint8_scale(const SimpleTensor<T> *in, const SimpleTensor<T> *bias, SimpleTensor<uint8_t> *dst, int32_t result_offset, std::vector<int32_t> result_mult_int, + std::vector<int32_t> result_shift, int32_t min, int32_t max) { - const int cols_in = in->shape().x(); + const int cols_in = in->shape().x(); + const bool is_per_channel = result_mult_int.size() > 1; for(int i = 0; i < in->num_elements(); ++i) { @@ -53,9 +54,9 @@ void quantize_down_int32_to_uint8_scale(const SimpleTensor<T> *in, const SimpleT result += (*bias)[i % cols_in]; } - result *= result_mult_int; + result *= (is_per_channel) ? result_mult_int[i % cols_in] : result_mult_int[0]; - result >>= result_shift; + result >>= (is_per_channel) ? result_shift[i % cols_in] : result_shift[0]; // Bounded ReLu if(min != max) @@ -68,10 +69,11 @@ void quantize_down_int32_to_uint8_scale(const SimpleTensor<T> *in, const SimpleT } template <typename T> -void quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> *in, const SimpleTensor<T> *bias, SimpleTensor<uint8_t> *dst, int32_t result_fixedpoint_multiplier, int32_t result_shift, - int32_t result_offset_after_shift, int32_t min, int32_t max) +void quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> *in, const SimpleTensor<T> *bias, SimpleTensor<uint8_t> *dst, std::vector<int32_t> result_fixedpoint_multiplier, + std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max) { - const int cols_in = in->shape().x(); + const int cols_in = in->shape().x(); + const bool is_per_channel = result_fixedpoint_multiplier.size() > 1; for(int i = 0; i < in->num_elements(); ++i) { @@ -83,7 +85,10 @@ void quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> *in, } // Fixed point multiplication - result = asymm_rounding_divide_by_pow2(asymm_int_mult(result, result_fixedpoint_multiplier), result_shift); + const int32_t multiplier = (is_per_channel) ? result_fixedpoint_multiplier[i % cols_in] : result_fixedpoint_multiplier[0]; + const int32_t shift = (is_per_channel) ? result_shift[i % cols_in] : result_shift[0]; + + result = asymm_rounding_divide_by_pow2(asymm_int_mult(result, multiplier), shift); result += result_offset_after_shift; // Bounded ReLu @@ -132,8 +137,8 @@ void quantize_down_int32_to_int16_scale_by_fixedpoint(const SimpleTensor<T> *in, } } // namespace -template <typename T_out, typename T_in> -SimpleTensor<T_out> gemmlowp_matrix_multiply_core(const SimpleTensor<T_in> &a, const SimpleTensor<T_in> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset) +template <typename T_out, typename T_in, typename T_in_1> +SimpleTensor<T_out> gemmlowp_matrix_multiply_core(const SimpleTensor<T_in> &a, const SimpleTensor<T_in_1> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset) { static_assert(std::is_same<typename std::decay<T_out>::type, int32_t>::value, "Only int32_t is allowed for the output"); @@ -186,14 +191,15 @@ SimpleTensor<T_out> gemmlowp_matrix_multiply_core(const SimpleTensor<T_in> &a, c } // used to validate assembly kernels which don't know anything about offsets -template <typename T1, typename T2> -SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T2> &b, TensorShape shape_c) +template <typename T1, typename T2, typename T3> +SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c) { - return gemmlowp_matrix_multiply_core<T1, T2>(a, b, shape_c, 0, 0); + return gemmlowp_matrix_multiply_core<T1, T2, T3>(a, b, shape_c, 0, 0); } template <typename T> -SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max) +SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, int32_t result_offset, std::vector<int32_t> result_mult_int, std::vector<int32_t> result_shift, + int32_t min, int32_t max) { SimpleTensor<uint8_t> dst(in.shape(), DataType::QASYMM8); @@ -203,8 +209,8 @@ SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTe } template <typename T> -SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, - int32_t min, int32_t max) +SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_offset, std::vector<int32_t> result_mult_int, + std::vector<int32_t> result_shift, int32_t min, int32_t max) { SimpleTensor<uint8_t> dst(in.shape(), DataType::QASYMM8); @@ -214,9 +220,8 @@ SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTe } template <typename T> -SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, int32_t result_fixedpoint_multiplier, int32_t result_shift, - int32_t result_offset_after_shift, int32_t min, - int32_t max) +SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, std::vector<int32_t> result_fixedpoint_multiplier, std::vector<int32_t> result_shift, + int32_t result_offset_after_shift, int32_t min, int32_t max) { SimpleTensor<uint8_t> dst(in.shape(), DataType::QASYMM8); @@ -226,8 +231,8 @@ SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint( } template <typename T> -SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_fixedpoint_multiplier, int32_t result_shift, - int32_t result_offset_after_shift, int32_t min, int32_t max) +SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, std::vector<int32_t> result_fixedpoint_multiplier, + std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max) { SimpleTensor<uint8_t> dst(in.shape(), DataType::QASYMM8); @@ -258,22 +263,24 @@ SimpleTensor<int16_t> gemmlowp_quantize_down_int32_to_int16_scale_by_fixedpoint( return dst; } -template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, int32_t result_fixedpoint_multiplier, int32_t result_shift, - int32_t result_offset_after_shift, int32_t min, int32_t max); -template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_fixedpoint_multiplier, - int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max); +template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, std::vector<int32_t> result_fixedpoint_multiplier, + std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max); +template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, + std::vector<int32_t> result_fixedpoint_multiplier, + std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max); template SimpleTensor<int16_t> gemmlowp_quantize_down_int32_to_int16_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t min, int32_t max); template SimpleTensor<int16_t> gemmlowp_quantize_down_int32_to_int16_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t min, int32_t max); -template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<int32_t> &a, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, - int32_t max); -template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_offset, int32_t result_mult_int, - int32_t result_shift, int32_t min, int32_t max); +template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<int32_t> &a, int32_t result_offset, std::vector<int32_t> result_mult_int, + std::vector<int32_t> result_shift, int32_t min, int32_t max); +template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_offset, std::vector<int32_t> result_mult_int, + std::vector<int32_t> result_shift, int32_t min, int32_t max); template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset); template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset); -template SimpleTensor<int32_t> gemmlowp(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c); -template SimpleTensor<int32_t> gemmlowp(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c); +template SimpleTensor<int32_t> gemmlowp<int32_t, int8_t, int8_t>(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c); +template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, uint8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c); +template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, int8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/GEMMLowp.h b/tests/validation/reference/GEMMLowp.h index 5581f67652..815527e1b7 100644 --- a/tests/validation/reference/GEMMLowp.h +++ b/tests/validation/reference/GEMMLowp.h @@ -35,31 +35,32 @@ namespace validation { namespace reference { -template <typename T> -SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min = 0, int32_t max = 0); -template <typename T1, typename T2> -SimpleTensor<T1> gemmlowp_matrix_multiply_core(const SimpleTensor<T2> &a, const SimpleTensor<T2> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset); +template <typename T1, typename T2, typename T3> +SimpleTensor<T1> gemmlowp_matrix_multiply_core(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset); -template <typename T> -SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, int32_t result_offset, int32_t result_mult_int, int32_t result_shift); +template <typename T1, typename T2, typename T3 = T2> +SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c); -template <typename T1, typename T2> -SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T2> &b, TensorShape shape_c); +template <typename T> +SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, int32_t result_offset, std::vector<int32_t> result_mult_int, std::vector<int32_t> result_shift); template <typename T> -SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, +SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, int32_t result_offset, std::vector<int32_t> result_mult_int, std::vector<int32_t> result_shift, int32_t min = 0, int32_t max = 0); template <typename T> -SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, int32_t result_fixedpoint_multiplier, int32_t result_shift, - int32_t result_offset_after_shift, - int32_t min = 0, int32_t max = 0); +SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_offset, std::vector<int32_t> result_mult_int, + std::vector<int32_t> result_shift, int32_t min = 0, int32_t max = 0); template <typename T> -SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_fixedpoint_multiplier, int32_t result_shift, +SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, std::vector<int32_t> result_fixedpoint_multiplier, std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min = 0, int32_t max = 0); template <typename T> +SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, std::vector<int32_t> result_fixedpoint_multiplier, + std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min = 0, int32_t max = 0); + +template <typename T> SimpleTensor<int16_t> gemmlowp_quantize_down_int32_to_int16_scale_by_fixedpoint(const SimpleTensor<T> &in, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t min, int32_t max); template <typename T> |