From 11ab45148a69f76e7821fa5e0670e4bacb05c776 Mon Sep 17 00:00:00 2001 From: SiCong Li Date: Tue, 7 Nov 2023 12:04:59 +0000 Subject: Implement dynamic quantization for GEMMLowp tests This patch calculates the output quantization info based on the inputs' quantization information. The previous approach was using the same quantization information for input, weights and output. Remove QSYMM8_PER_CHANNEL path from the fixture as there are no related tests Remove repeated shapes from the dataset now that we get rid of the quantization info from the dataset. Combine signed and unsigned SmallGEMMLowpFusedBatchedMatMulDataset into one as they become identical Resolves COMPMID-6481, COMPMID-6634 Change-Id: I9f5a20f4bb45c3e5adab388564135ae8a5c0a9ea Signed-off-by: SiCong Li Signed-off-by: Gunes Bayir Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10680 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Jakub Sujak Benchmark: Arm Jenkins --- tests/datasets/GEMMLowpFusedOffsetOutputDataset.h | 210 +++++--------- tests/validation/CL/GEMMLowp.cpp | 110 ++++---- tests/validation/NEON/GEMMLowp.cpp | 41 +-- tests/validation/fixtures/GEMMLowpFixture.h | 319 +++++++++++++--------- 4 files changed, 333 insertions(+), 347 deletions(-) (limited to 'tests') diff --git a/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h b/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h index 8c90efcbdd..b0ad4879ba 100644 --- a/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h +++ b/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022 Arm Limited. + * Copyright (c) 2019-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_GEMMLOWPOUTPUT_DATASET -#define ARM_COMPUTE_TEST_GEMMLOWPOUTPUT_DATASET +#ifndef ACL_TESTS_DATASETS_GEMMLOWPFUSEDOFFSETOUTPUTDATASET_H +#define ACL_TESTS_DATASETS_GEMMLOWPFUSEDOFFSETOUTPUTDATASET_H #include "utils/TypePrinter.h" @@ -40,21 +40,17 @@ namespace datasets class GEMMLowpFusedOffsetOutputDataset { public: - using type = std::tuple; + using type = std::tuple; struct iterator { iterator(std::vector::const_iterator a_it, std::vector::const_iterator b_it, std::vector::const_iterator c_it, - std::vector::const_iterator a_offset_it, - std::vector::const_iterator b_offset_it, - std::vector::const_iterator output_stage_it) + std::vector::const_iterator output_stage_it) : _a_it{ std::move(a_it) }, _b_it{ std::move(b_it) }, _c_it{ std::move(c_it) }, - _a_offset_it{ std::move(a_offset_it) }, - _b_offset_it{ std::move(b_offset_it) }, _output_stage_it{ std::move(output_stage_it) } { } @@ -65,33 +61,14 @@ public: description << "A=" << *_a_it << ":"; description << "B=" << *_b_it << ":"; description << "C=" << *_c_it << ":"; - description << "a_offset=" << *_a_offset_it << ":"; - description << "b_offset=" << *_b_offset_it << ":"; - description << "output_type=" << string_from_gemmlowp_output_stage((*_output_stage_it).type) << ":"; - description << "output_offset=" << (*_output_stage_it).gemmlowp_offset << ":"; - description << "output_multiplier={"; - for(auto it = (*_output_stage_it).gemmlowp_multipliers.begin(); it != (*_output_stage_it).gemmlowp_multipliers.end(); ++it) - { - description << (*it) << ", "; - } - description << "}:"; - description << "output_shift={"; - - for(auto it = (*_output_stage_it).gemmlowp_shifts.begin(); it != (*_output_stage_it).gemmlowp_shifts.end(); ++it) - { - description << (*it) << ", "; - } - description << "}:"; - description << "output_min=" << (*_output_stage_it).gemmlowp_min_bound << ":"; - description << "output_max=" << (*_output_stage_it).gemmlowp_max_bound << ":"; - description << "is_quantized_per_channel=" << (*_output_stage_it).is_quantized_per_channel << ":"; + description << "output_type=" << string_from_gemmlowp_output_stage(*_output_stage_it) << ":"; return description.str(); } GEMMLowpFusedOffsetOutputDataset::type operator*() const { - return std::make_tuple(*_a_it, *_b_it, *_c_it, *_a_offset_it, *_b_offset_it, *_output_stage_it); + return std::make_tuple(*_a_it, *_b_it, *_c_it, *_output_stage_it); } iterator &operator++() @@ -99,8 +76,6 @@ public: ++_a_it; ++_b_it; ++_c_it; - ++_a_offset_it; - ++_b_offset_it; ++_output_stage_it; return *this; @@ -110,45 +85,27 @@ public: std::vector::const_iterator _a_it; std::vector::const_iterator _b_it; std::vector::const_iterator _c_it; - std::vector::const_iterator _a_offset_it; - std::vector::const_iterator _b_offset_it; - std::vector::const_iterator _output_stage_it; + std::vector::const_iterator _output_stage_it; }; iterator begin() const { - return iterator(_a_shapes.begin(), _b_shapes.begin(), _c_shapes.begin(), _a_offset.begin(), _b_offset.begin(), _output_stage.begin()); + return iterator(_a_shapes.begin(), _b_shapes.begin(), _c_shapes.begin(), _output_stage.begin()); } int size() const { - return std::min(_a_shapes.size(), std::min(_b_shapes.size(), std::min(_c_shapes.size(), std::min(_a_offset.size(), std::min(_b_offset.size(), _output_stage.size()))))); + return std::min(_a_shapes.size(), std::min(_b_shapes.size(), std::min(_c_shapes.size(), _output_stage.size()))); } - void add_config(TensorShape a, TensorShape b, TensorShape c, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage) + void add_config(TensorShape a, TensorShape b, TensorShape c, GEMMLowpOutputStageType output_stage) { _a_shapes.emplace_back(std::move(a)); _b_shapes.emplace_back(std::move(b)); _c_shapes.emplace_back(std::move(c)); - _a_offset.emplace_back(std::move(a_offset)); - _b_offset.emplace_back(std::move(b_offset)); _output_stage.emplace_back(std::move(output_stage)); } - GEMMLowpOutputStageInfo OutputStageInfo(GEMMLowpOutputStageType type, int32_t offset, int32_t multiplier, int32_t shift, int32_t min, int32_t max) - { - GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(); - output_stage.type = type; - output_stage.gemmlowp_offset = offset; - output_stage.gemmlowp_multiplier = multiplier; - output_stage.gemmlowp_shift = shift; - output_stage.gemmlowp_min_bound = min; - output_stage.gemmlowp_max_bound = max; - output_stage.gemmlowp_multipliers.push_back(multiplier); - output_stage.gemmlowp_shifts.push_back(shift); - return output_stage; - } - protected: GEMMLowpFusedOffsetOutputDataset() = default; GEMMLowpFusedOffsetOutputDataset(GEMMLowpFusedOffsetOutputDataset &&) = default; @@ -157,9 +114,7 @@ private: std::vector _a_shapes{}; std::vector _b_shapes{}; std::vector _c_shapes{}; - std::vector _a_offset{}; - std::vector _b_offset{}; - std::vector _output_stage{}; + std::vector _output_stage{}; }; class SmallGEMMLowpFusedOffsetOutputUint8Dataset final : public GEMMLowpFusedOffsetOutputDataset @@ -167,47 +122,28 @@ class SmallGEMMLowpFusedOffsetOutputUint8Dataset final : public GEMMLowpFusedOff public: SmallGEMMLowpFusedOffsetOutputUint8Dataset() { - add_config(TensorShape(21U, 13U), TensorShape(1U, 21U), TensorShape(1U, 13U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 13, 10, 210)); - add_config(TensorShape(52U, 13U), TensorShape(33U, 52U), TensorShape(33U, 13U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 100, 2, 13, 10, 210)); - add_config(TensorShape(31U, 27U), TensorShape(23U, 31U), TensorShape(23U, 27U), 18, 23, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 200, 2, 13, 10, 210)); - add_config(TensorShape(32U, 72U), TensorShape(16U, 32U), TensorShape(16U, 72U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 13, 10, 210)); - - add_config(TensorShape(21U, 1U), TensorShape(43U, 21U), TensorShape(43U, 1U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601600, 10, 10, 210)); - add_config(TensorShape(31U, 3U), TensorShape(72U, 31U), TensorShape(72U, 3U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 254601600, 10, 10, 210)); - add_config(TensorShape(31U, 27U), TensorShape(23U, 31U), TensorShape(23U, 27U), 5, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 2, 254601602, 10, 10, 210)); - add_config(TensorShape(32U, 72U), TensorShape(17U, 32U), TensorShape(17U, 72U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601602, 10, 10, 210)); + add_config(TensorShape(21U, 13U), TensorShape(1U, 21U), TensorShape(1U, 13U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(52U, 13U), TensorShape(33U, 52U), TensorShape(33U, 13U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(31U, 27U), TensorShape(23U, 31U), TensorShape(23U, 27U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(32U, 72U), TensorShape(16U, 32U), TensorShape(16U, 72U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(21U, 1U), TensorShape(43U, 21U), TensorShape(43U, 1U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(31U, 3U), TensorShape(72U, 31U), TensorShape(72U, 3U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(32U, 72U), TensorShape(17U, 32U), TensorShape(17U, 72U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); } }; -class SmallGEMMLowpFusedBatchedMatMulDatasetUnsigned final : public GEMMLowpFusedOffsetOutputDataset +class SmallGEMMLowpFusedBatchedMatMulDataset final : public GEMMLowpFusedOffsetOutputDataset { public: - SmallGEMMLowpFusedBatchedMatMulDatasetUnsigned() + SmallGEMMLowpFusedBatchedMatMulDataset() { - add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U, 3U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 5, 1 << 25, 5, 0, 254)); - add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U, 3U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 100, 1 << 25, 3, 0, 254)); - add_config(TensorShape(12U, 15U), TensorShape(7U, 12U), TensorShape(7U, 15U), -3, 15, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 1 << 19, 0, 20, 210)); - add_config(TensorShape(59U, 17U), TensorShape(36U, 59U), TensorShape(36U, 17U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -30, 2, 1 << 25, 14, 210)); - add_config(TensorShape(2U, 4U, 3U), TensorShape(5U, 2U, 3U), TensorShape(5U, 4U, 3U), -5, 12, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -20, 1 << 25, 4, 0, 127)); - add_config(TensorShape(15U, 7U, 3U), TensorShape(29U, 15U, 3U), TensorShape(29U, 7U, 3U), 5, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -10, 1 << 25, 6, 10, 210)); - add_config(TensorShape(56U, 17U, 32U), TensorShape(5U, 56U, 32U), TensorShape(5U, 17U, 32U), -3, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -15, 1 << 25, 3, 10, 210)); - add_config(TensorShape(13U, 256U, 32U), TensorShape(19U, 13U, 32U), TensorShape(19U, 256U, 32U), 5, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -15, 1 << 25, 6, 50, 225)); - } -}; - -class SmallGEMMLowpFusedBatchedMatMulDatasetSigned final : public GEMMLowpFusedOffsetOutputDataset -{ -public: - SmallGEMMLowpFusedBatchedMatMulDatasetSigned() - { - add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U, 3U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 5, 1 << 25, 5, -128, 127)); - add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U, 3U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 100, 1 << 25, 3, -128, 127)); - add_config(TensorShape(12U, 15U), TensorShape(7U, 12U), TensorShape(7U, 15U), -3, 15, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 1 << 19, 0, -108, 127)); - add_config(TensorShape(59U, 17U), TensorShape(36U, 59U), TensorShape(36U, 17U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -30, 2, 1 << 25, -98, 107)); - add_config(TensorShape(2U, 4U, 3U), TensorShape(5U, 2U, 3U), TensorShape(5U, 4U, 3U), -5, 12, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -20, 1 << 25, 4, -127, 64)); - add_config(TensorShape(15U, 7U, 3U), TensorShape(29U, 15U, 3U), TensorShape(29U, 7U, 3U), 5, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -10, 1 << 25, 6, -64, 127)); - add_config(TensorShape(56U, 17U, 32U), TensorShape(5U, 56U, 32U), TensorShape(5U, 17U, 32U), 3, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -15, 1 << 25, 6, -127, 110)); - add_config(TensorShape(13U, 256U, 32U), TensorShape(19U, 13U, 32U), TensorShape(19U, 256U, 32U), 5, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -15, 1 << 25, 6, -77, 115)); + add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U, 3U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(12U, 15U), TensorShape(7U, 12U), TensorShape(7U, 15U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(59U, 17U), TensorShape(36U, 59U), TensorShape(36U, 17U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(2U, 4U, 3U), TensorShape(5U, 2U, 3U), TensorShape(5U, 4U, 3U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(15U, 7U, 3U), TensorShape(29U, 15U, 3U), TensorShape(29U, 7U, 3U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(56U, 17U, 32U), TensorShape(5U, 56U, 32U), TensorShape(5U, 17U, 32U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(13U, 256U, 32U), TensorShape(19U, 13U, 32U), TensorShape(19U, 256U, 32U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); } }; @@ -216,14 +152,12 @@ class SmallGEMMLowpFusedOffsetOutputOutput3DUint8Dataset final : public GEMMLowp public: SmallGEMMLowpFusedOffsetOutputOutput3DUint8Dataset() { - add_config(TensorShape(21U, 1421U, 33U), TensorShape(34U, 21U), TensorShape(34U, 7U, 203U, 33U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 13, 10, 210)); - add_config(TensorShape(31U, 102U, 55U), TensorShape(23U, 31U), TensorShape(23U, 1U, 102U, 55U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 100, 2, 13, 10, 210)); - add_config(TensorShape(38U, 1200U, 77U), TensorShape(21U, 38U), TensorShape(21U, 4U, 300U, 77U), 18, 23, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 200, 2, 13, 10, 210)); - add_config(TensorShape(32U, 103U, 99U), TensorShape(17U, 32U), TensorShape(17U, 1U, 103U, 99U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 13, 10, 210)); - add_config(TensorShape(16U, 1600U, 111U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 111U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601600, 10, 10, - 210)); - add_config(TensorShape(16U, 1600U, 113U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 113U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 254601600, 10, 10, - 210)); + add_config(TensorShape(21U, 1421U, 33U), TensorShape(34U, 21U), TensorShape(34U, 7U, 203U, 33U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(31U, 102U, 55U), TensorShape(23U, 31U), TensorShape(23U, 1U, 102U, 55U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(38U, 1200U, 77U), TensorShape(21U, 38U), TensorShape(21U, 4U, 300U, 77U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(32U, 103U, 99U), TensorShape(17U, 32U), TensorShape(17U, 1U, 103U, 99U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(16U, 1600U, 111U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 111U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(16U, 1600U, 113U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 113U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); } }; @@ -232,14 +166,12 @@ class SmallGEMMLowpFusedOffsetOutputInputOutput3DUint8Dataset final : public GEM public: SmallGEMMLowpFusedOffsetOutputInputOutput3DUint8Dataset() { - add_config(TensorShape(21U, 7U, 203U, 33U), TensorShape(34U, 21U), TensorShape(34U, 7U, 203U, 33U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 13, 10, 210)); - add_config(TensorShape(31U, 1U, 102U, 55U), TensorShape(23U, 31U), TensorShape(23U, 1U, 102U, 55U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 100, 2, 13, 10, 210)); - add_config(TensorShape(38U, 4U, 300U, 77U), TensorShape(21U, 38U), TensorShape(21U, 4U, 300U, 77U), 18, 23, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 200, 2, 13, 10, 210)); - add_config(TensorShape(32U, 1U, 103U, 99U), TensorShape(17U, 32U), TensorShape(17U, 1U, 103U, 99U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 13, 10, 210)); - add_config(TensorShape(16U, 8U, 200U, 111U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 111U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601600, 10, 10, - 210)); - add_config(TensorShape(16U, 8U, 200U, 113U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 113U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 254601600, 10, 10, - 210)); + add_config(TensorShape(21U, 7U, 203U, 33U), TensorShape(34U, 21U), TensorShape(34U, 7U, 203U, 33U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(31U, 1U, 102U, 55U), TensorShape(23U, 31U), TensorShape(23U, 1U, 102U, 55U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(38U, 4U, 300U, 77U), TensorShape(21U, 38U), TensorShape(21U, 4U, 300U, 77U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(32U, 1U, 103U, 99U), TensorShape(17U, 32U), TensorShape(17U, 1U, 103U, 99U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(16U, 8U, 200U, 111U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 111U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(16U, 8U, 200U, 113U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 113U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); } }; @@ -248,28 +180,14 @@ class SmallGEMMLowpFusedOffsetOutputInt8Dataset final : public GEMMLowpFusedOffs public: SmallGEMMLowpFusedOffsetOutputInt8Dataset() { - add_config(TensorShape(21U, 1U), TensorShape(1U, 21U), TensorShape(1U, 1U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -50, 2, 13, -10, 110)); - add_config(TensorShape(31U, 3U), TensorShape(72U, 31U), TensorShape(72U, 3U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, -10, 110)); - add_config(TensorShape(52U, 26U), TensorShape(33U, 52U), TensorShape(33U, 26U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, -10, 110)); - add_config(TensorShape(38U, 43U), TensorShape(21U, 38U), TensorShape(21U, 43U), -3, -2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -40, 2, 13, -10, 110)); - - add_config(TensorShape(21U, 13U), TensorShape(33U, 21U), TensorShape(33U, 13U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601600, 10, -10, 110)); - add_config(TensorShape(52U, 26U), TensorShape(33U, 52U), TensorShape(33U, 26U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 1, 254601600, 10, -10, 110)); - add_config(TensorShape(38U, 43U), TensorShape(21U, 38U), TensorShape(21U, 43U), -3, -2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601602, 10, -10, 110)); - add_config(TensorShape(32U, 72U), TensorShape(17U, 32U), TensorShape(17U, 72U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601602, 10, -10, 110)); - } -}; - -class SmallGEMMLowpFusedOffsetOutputPerChannelDataset final : public GEMMLowpFusedOffsetOutputDataset -{ -public: - SmallGEMMLowpFusedOffsetOutputPerChannelDataset() - { - add_config(TensorShape(21U, 1U, 6U), TensorShape(43U, 21U, 6U), TensorShape(43U, 1U, 6U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -200, 2, 13, 10, 210)); - add_config(TensorShape(21U, 13U, 3U), TensorShape(33U, 21U, 3U), TensorShape(33U, 13U, 3U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -100, 2, 13, 10, 210)); - add_config(TensorShape(31U, 3U, 2U), TensorShape(72U, 31U, 2U), TensorShape(72U, 3U, 2U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, 10, 210)); - add_config(TensorShape(52U, 13U, 7U), TensorShape(33U, 52U, 7U), TensorShape(33U, 13U, 7U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 100, 2, 13, 10, 210)); - add_config(TensorShape(52U, 26U, 8U), TensorShape(33U, 52U, 8U), TensorShape(33U, 26U, 8U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, 10, 210)); + add_config(TensorShape(21U, 1U), TensorShape(1U, 21U), TensorShape(1U, 1U), GEMMLowpOutputStageType::QUANTIZE_DOWN); + add_config(TensorShape(31U, 3U), TensorShape(72U, 31U), TensorShape(72U, 3U), GEMMLowpOutputStageType::QUANTIZE_DOWN); + add_config(TensorShape(52U, 26U), TensorShape(33U, 52U), TensorShape(33U, 26U), GEMMLowpOutputStageType::QUANTIZE_DOWN); + add_config(TensorShape(38U, 43U), TensorShape(21U, 38U), TensorShape(21U, 43U), GEMMLowpOutputStageType::QUANTIZE_DOWN); + add_config(TensorShape(21U, 13U), TensorShape(33U, 21U), TensorShape(33U, 13U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(52U, 26U), TensorShape(33U, 52U), TensorShape(33U, 26U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(38U, 43U), TensorShape(21U, 38U), TensorShape(21U, 43U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(32U, 72U), TensorShape(17U, 32U), TensorShape(17U, 72U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); } }; @@ -278,15 +196,12 @@ class LargeGEMMLowpFusedOffsetOutputUint8Dataset final : public GEMMLowpFusedOff public: LargeGEMMLowpFusedOffsetOutputUint8Dataset() { - add_config(TensorShape(923U, 429U), TensorShape(871U, 923U), TensorShape(871U, 429U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 18, 10, 210)); - add_config(TensorShape(873U, 513U), TensorShape(784U, 873U), TensorShape(784U, 513U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 100, 2, 18, 10, 210)); - add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U), 5, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 200, 2, 18, 10, 210)); - add_config(TensorShape(941U, 1011U), TensorShape(623U, 941U), TensorShape(623U, 1011U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 18, 10, 210)); + add_config(TensorShape(923U, 429U), TensorShape(871U, 923U), TensorShape(871U, 429U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(873U, 513U), TensorShape(784U, 873U), TensorShape(784U, 513U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(941U, 1011U), TensorShape(623U, 941U), TensorShape(623U, 1011U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(681U, 1023U), TensorShape(213U, 681U), TensorShape(213U, 1023U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); - add_config(TensorShape(923U, 429U), TensorShape(871U, 923U), TensorShape(871U, 429U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601600, 15, 10, 210)); - add_config(TensorShape(873U, 513U), TensorShape(784U, 873U), TensorShape(784U, 513U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 1, 254601600, 15, 10, 210)); - add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U), 5, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601602, 15, 10, 210)); - add_config(TensorShape(681U, 1023U), TensorShape(213U, 681U), TensorShape(213U, 1023U), -3, -2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601602, 15, 10, 210)); } }; @@ -295,18 +210,17 @@ class LargeGEMMLowpFusedOffsetOutputInt8Dataset final : public GEMMLowpFusedOffs public: LargeGEMMLowpFusedOffsetOutputInt8Dataset() { - add_config(TensorShape(923U, 1U, 15U), TensorShape(871U, 923U, 15U), TensorShape(871U, 1U, 15U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -50, 2, 18, -10, 110)); - add_config(TensorShape(873U, 7U), TensorShape(784U, 873U), TensorShape(784U, 7U), -1, 3, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 18, -10, 110)); - add_config(TensorShape(697U, 872U), TensorShape(563U, 697U), TensorShape(563U, 872U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 18, -10, 110)); - add_config(TensorShape(681U, 1023U), TensorShape(213U, 681U), TensorShape(213U, 1023U), -3, -2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -50, 2, 18, -10, 110)); - - add_config(TensorShape(923U, 1U), TensorShape(871U, 923U), TensorShape(871U, 1U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601600, 15, -10, 110)); - add_config(TensorShape(873U, 7U), TensorShape(784U, 873U), TensorShape(784U, 7U), -1, 3, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 254601600, 15, -10, 110)); - add_config(TensorShape(697U, 872U), TensorShape(563U, 697U), TensorShape(563U, 872U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 2, 254601602, 15, -10, 110)); - add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U), 5, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601602, 15, -10, 110)); + add_config(TensorShape(923U, 1U, 15U), TensorShape(871U, 923U, 15U), TensorShape(871U, 1U, 15U), GEMMLowpOutputStageType::QUANTIZE_DOWN); + add_config(TensorShape(873U, 7U), TensorShape(784U, 873U), TensorShape(784U, 7U), GEMMLowpOutputStageType::QUANTIZE_DOWN); + add_config(TensorShape(697U, 872U), TensorShape(563U, 697U), TensorShape(563U, 872U), GEMMLowpOutputStageType::QUANTIZE_DOWN); + add_config(TensorShape(681U, 1023U), TensorShape(213U, 681U), TensorShape(213U, 1023U), GEMMLowpOutputStageType::QUANTIZE_DOWN); + add_config(TensorShape(923U, 1U), TensorShape(871U, 923U), TensorShape(871U, 1U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(873U, 7U), TensorShape(784U, 873U), TensorShape(784U, 7U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(697U, 872U), TensorShape(563U, 697U), TensorShape(563U, 872U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); } }; } // namespace datasets } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_GEMMLOWPOUTPUT_DATASET */ +#endif // ACL_TESTS_DATASETS_GEMMLOWPFUSEDOFFSETOUTPUTDATASET_H diff --git a/tests/validation/CL/GEMMLowp.cpp b/tests/validation/CL/GEMMLowp.cpp index 0b057b9dce..1ae9e96626 100644 --- a/tests/validation/CL/GEMMLowp.cpp +++ b/tests/validation/CL/GEMMLowp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,6 +44,9 @@ namespace test { namespace validation { + +using framework::dataset::make; + namespace { constexpr AbsoluteTolerance tolerance_quant(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ @@ -72,9 +75,9 @@ using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned = TEST_SUITE(BatchedMatMul) TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned, framework::DatasetMode::ALL, - combine(combine(datasets::SmallGEMMLowpFusedBatchedMatMulDatasetUnsigned(), - framework::dataset::make("DataType", { DataType::QASYMM8 })), - framework::dataset::make("bool", { false }))) + combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(), + make("DataType", { DataType::QASYMM8 }), + make("reshape_b_only_on_first_run", { false }))) { validate(CLAccessor(_target), _reference, tolerance_quant); } @@ -84,9 +87,9 @@ using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture; TEST_SUITE(QASYMM8_SIGNED) FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned, framework::DatasetMode::ALL, - combine(combine(datasets::SmallGEMMLowpFusedBatchedMatMulDatasetSigned(), - framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })), - framework::dataset::make("bool", { false }))) + combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(), + make("DataType", { DataType::QASYMM8_SIGNED }), + make("reshape_b_only_on_first_run", { false }))) { validate(CLAccessor(_target), _reference, tolerance_quant); } @@ -96,9 +99,10 @@ TEST_SUITE_END() // BatchedMatMul TEST_SUITE(FusedOffsetOutput) TEST_SUITE(QASYMM8) using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture; -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(), - framework::dataset::make("DataType", { DataType::QASYMM8 })), - framework::dataset::make("reshape_b_only_on_first_run", { true, false }))) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::ALL, + combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(), + make("DataType", { DataType::QASYMM8 }), + make("reshape_b_only_on_first_run", { true, false }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_quant); @@ -108,9 +112,9 @@ TEST_SUITE(Output3D) using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputOutput3DUint8Fixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture; FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputOutput3DUint8Fixture, framework::DatasetMode::ALL, - combine(combine(datasets::SmallGEMMLowpFusedOffsetOutputOutput3DUint8Dataset(), - framework::dataset::make("DataType", { DataType::QASYMM8 })), - framework::dataset::make("reshape_b_only_on_first_run", { true, false }))) + combine(datasets::SmallGEMMLowpFusedOffsetOutputOutput3DUint8Dataset(), + make("DataType", { DataType::QASYMM8 }), + make("reshape_b_only_on_first_run", { true, false }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_quant); @@ -121,18 +125,19 @@ TEST_SUITE(InputOutput3D) using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInputOutput3DUint8Fixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture; FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInputOutput3DUint8Fixture, framework::DatasetMode::ALL, - combine(combine(datasets::SmallGEMMLowpFusedOffsetOutputInputOutput3DUint8Dataset(), - framework::dataset::make("DataType", { DataType::QASYMM8 })), - framework::dataset::make("reshape_b_only_on_first_run", { true, false }))) + combine(datasets::SmallGEMMLowpFusedOffsetOutputInputOutput3DUint8Dataset(), + make("DataType", { DataType::QASYMM8 }), + make("reshape_b_only_on_first_run", { true, false }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_quant); } TEST_SUITE_END() // InputOutput3D -FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(), - framework::dataset::make("DataType", { DataType::QASYMM8 })), - framework::dataset::make("reshape_b_only_on_first_run", { true, false }))) +FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::NIGHTLY, + combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(), + make("DataType", { DataType::QASYMM8 }), + make("reshape_b_only_on_first_run", { true, false }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_quant); @@ -141,8 +146,9 @@ TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInt8Fixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture; -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInt8Fixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputInt8Dataset(), - framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED }))) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInt8Fixture, framework::DatasetMode::ALL, + combine(datasets::SmallGEMMLowpFusedOffsetOutputInt8Dataset(), + make("DataType", { DataType::QASYMM8_SIGNED }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_quant); @@ -185,24 +191,24 @@ TEST_SUITE(QuantizeDownInt32Scale) TEST_SUITE(QASYMM8) -const auto quantize_down_int32_to_uint8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2, - 3) - * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true }); +const auto quantize_down_int32_to_uint8_scale_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2) * make("result_shift", 2, 3) + * make("min", 0) * make("max", 255) * make("addBias", { false, true }); -const auto quantize_down_int32_to_uint8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, - 2) - * framework::dataset::make("result_shift", 2, 3) * framework::dataset::make("min", 0, 2) * framework::dataset::make("max", 171, 173) * framework::dataset::make("addBias", { false, true }); +const auto quantize_down_int32_to_uint8_scale_relu_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2) + * make("result_shift", 2, 3) * make("min", 0, 2) * make("max", 171, 173) * make("addBias", { false, true }); using CLGEMMLowpQuantizeDownInt32ScaleFixture = GEMMLowpQuantizeDownInt32ToUint8ScaleValidationFixture; -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_cases)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, + combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_cases)) { // Validate output validate(CLAccessor(_target), _reference); } TEST_SUITE(BoundedReLu) -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_relu_cases)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, + combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_relu_cases)) { // Validate output validate(CLAccessor(_target), _reference); @@ -213,24 +219,24 @@ TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) -const auto quantize_down_int32_to_int8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2, - 3) - * framework::dataset::make("min", -128) * framework::dataset::make("max", 127) * framework::dataset::make("addBias", { false, true }); +const auto quantize_down_int32_to_int8_scale_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2) * make("result_shift", 2, 3) + * make("min", -128) * make("max", 127) * make("addBias", { false, true }); -const auto quantize_down_int32_to_int8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, - 2) - * framework::dataset::make("result_shift", 2, 3) * framework::dataset::make("min", -100, -98) * framework::dataset::make("max", 71, 73) * framework::dataset::make("addBias", { false, true }); +const auto quantize_down_int32_to_int8_scale_relu_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2) + * make("result_shift", 2, 3) * make("min", -100, -98) * make("max", 71, 73) * make("addBias", { false, true }); using CLGEMMLowpQuantizeDownInt32ScaleFixture = GEMMLowpQuantizeDownInt32ToInt8ScaleValidationFixture; -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_cases)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, + combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_cases)) { // Validate output validate(CLAccessor(_target), _reference); } TEST_SUITE(BoundedReLu) -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_relu_cases)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, + combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_relu_cases)) { // Validate output validate(CLAccessor(_target), _reference); @@ -247,13 +253,14 @@ using CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture = GEMMLowpQuantizeDownInt32ScaleByFloatValidationFixture; FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), - datasets::TinyShapes()), - framework::dataset::make("result_real_multiplier", 0.33f)), - framework::dataset::make("result_offset", 2, 3)), - framework::dataset::make("min", 0)), - framework::dataset::make("max", 255)), - framework::dataset::make("addBias", { false, true }))) + combine( + make("DataType", DataType::QASYMM8), + datasets::TinyShapes(), + make("result_real_multiplier", 0.33f), + make("result_offset", 2, 3), + make("min", 0), + make("max", 255), + make("addBias", { false, true }))) { // Validate output validate(CLAccessor(_target), _reference); @@ -264,13 +271,14 @@ TEST_SUITE(QASYMM8_SIGNED) using CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture_Signed = GEMMLowpQuantizeDownInt32ScaleByFloatValidationFixture; FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture_Signed, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), - datasets::TinyShapes()), - framework::dataset::make("result_real_multiplier", 0.33f)), - framework::dataset::make("result_offset", 2, 3)), - framework::dataset::make("min", -128)), - framework::dataset::make("max", 127)), - framework::dataset::make("addBias", { false, true }))) + combine( + make("DataType", DataType::QASYMM8_SIGNED), + datasets::TinyShapes(), + make("result_real_multiplier", 0.33f), + make("result_offset", 2, 3), + make("min", -128), + make("max", 127), + make("addBias", { false, true }))) { // Validate output validate(CLAccessor(_target), _reference); diff --git a/tests/validation/NEON/GEMMLowp.cpp b/tests/validation/NEON/GEMMLowp.cpp index 46058bd148..9c4d1741eb 100644 --- a/tests/validation/NEON/GEMMLowp.cpp +++ b/tests/validation/NEON/GEMMLowp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -50,9 +50,12 @@ namespace validation TEST_SUITE(NEON) TEST_SUITE(GEMMLowp) TEST_SUITE(MatrixMultiplyCore) + using NEGEMMLowpMatrixMultiplyCoreFixture = GEMMLowpMatrixMultiplyCoreValidationFixture; using NEGEMMLowpBatchedMatMulFixture = GEMMLowpMatrixMultiplyCoreValidationFixture; +using framework::dataset::make; + DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, framework::dataset::concat(datasets::SmallGEMMLowpDataset(), datasets::LargeGEMMLowpDataset()), shape_a, shape_b, shape_c, a_offset, b_offset) { @@ -80,26 +83,26 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, framework::dataset::c // *INDENT-OFF* // clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( - framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Input not a multiple of 4 +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip( + make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Input not a multiple of 4 TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Mismatching data type TensorInfo(TensorShape(20U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions TensorInfo(TensorShape(16U, 32U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), }), - framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)), + make("InputBInfo",{ TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)), TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)), TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)), TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)), TensorInfo(TensorShape(64U, 16U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)), - })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(33U, 13U), 1, DataType::S32), + }), + make("OutputInfo",{ TensorInfo(TensorShape(33U, 13U), 1, DataType::S32), TensorInfo(TensorShape(33U, 13U), 1, DataType::S32), TensorInfo(TensorShape(33U, 13U), 1, DataType::S32), TensorInfo(TensorShape(8U, 11U), 1, DataType::S32), TensorInfo(TensorShape(64U, 32U), 1, DataType::S32), - })), - framework::dataset::make("Expected", { true, false, false, false, true })), + }), + make("Expected", { true, false, false, false, true })), a_info, b_info, output_info, expected) { // Lock tensors @@ -231,9 +234,9 @@ using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned = TEST_SUITE(BatchedMatMul) TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned, framework::DatasetMode::ALL, - combine(combine(datasets::SmallGEMMLowpFusedBatchedMatMulDatasetUnsigned(), - framework::dataset::make("DataType", { DataType::QASYMM8 })), - framework::dataset::make("bool", { false }))) + combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(), + make("DataType", { DataType::QASYMM8 }), + make("reshape_b_only_on_first_run", { false }))) { validate(Accessor(_target), _reference, tolerance_batched); } @@ -243,9 +246,9 @@ using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture; TEST_SUITE(QASYMM8_SIGNED) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned, framework::DatasetMode::ALL, - combine(combine(datasets::SmallGEMMLowpFusedBatchedMatMulDatasetSigned(), - framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })), - framework::dataset::make("bool", { false }))) + combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(), + make("DataType", { DataType::QASYMM8_SIGNED }), + make("reshape_b_only_on_first_run", { false }))) { validate(Accessor(_target), _reference, tolerance_batched); } @@ -256,15 +259,17 @@ using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture = GEMMLowpMatrixMulti constexpr AbsoluteTolerance tolerance_quant(1); TEST_SUITE(FusedOffsetOutput) -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(), - framework::dataset::make("DataType", { DataType::QASYMM8 }))) +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL, + combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(), + make("DataType", { DataType::QASYMM8 }))) { // Validate output validate(Accessor(_target), _reference, tolerance_quant); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(), - framework::dataset::make("DataType", { DataType::QASYMM8 }))) +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY, + combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(), + make("DataType", { DataType::QASYMM8 }))) { // Validate output validate(Accessor(_target), _reference, tolerance_quant); diff --git a/tests/validation/fixtures/GEMMLowpFixture.h b/tests/validation/fixtures/GEMMLowpFixture.h index 1492ac6945..a65a1e6bd8 100644 --- a/tests/validation/fixtures/GEMMLowpFixture.h +++ b/tests/validation/fixtures/GEMMLowpFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,14 +21,19 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE -#define ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_GEMMLOWPFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_GEMMLOWPFIXTURE_H #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/utils/quantization/AsymmHelpers.h" +#include "tests/validation/Helpers.h" #include "tests/framework/Fixture.h" #include "tests/validation/Validation.h" #include "tests/validation/reference/GEMMLowp.h" +#include +#include + namespace arm_compute { namespace test @@ -37,82 +42,46 @@ namespace validation { namespace { + template void fill(U &&tensor, int i) { - switch(tensor.data_type()) - { - case DataType::QSYMM8_PER_CHANNEL: - { - int min_bound = 128; - int max_bound = -127; - for(size_t j = 0; j < tensor.quantization_info().scale().size(); j++) - { - std::pair bounds = get_symm_quantized_per_channel_bounds(tensor.quantization_info(), -1.0f, 1.0f, i); - if(bounds.first < min_bound) - { - min_bound = bounds.first; - } - if(bounds.second > max_bound) - { - max_bound = bounds.second; - } - } - std::uniform_int_distribution distribution(min_bound, max_bound); - library->fill(tensor, distribution, i); - break; - } - case DataType::QASYMM8: - { - std::uniform_int_distribution distribution(1, 254); - library->fill(tensor, distribution, i); - break; - } - case DataType::S32: - { - std::uniform_int_distribution distribution(-20000, 20000); - library->fill(tensor, distribution, i); - break; - } - case DataType::F16: - { - arm_compute::utils::uniform_real_distribution_16bit distribution{ -1.0f, 1.0f }; - library->fill(tensor, distribution, i); - break; - } - case DataType::F32: - { - std::uniform_real_distribution distribution(-1.0f, 1.0f); - library->fill(tensor, distribution, i); - break; - } - default: - library->fill_tensor_uniform(tensor, i); - } + ARM_COMPUTE_ASSERT(is_data_type_quantized(tensor.data_type())); + library->fill_tensor_uniform(tensor, i); } +template +void fill_bias_s32(U &&tensor, int i, int32_t min, int32_t max) +{ + ARM_COMPUTE_ASSERT(tensor.data_type() == DataType::S32); + std::uniform_int_distribution distribution(min, max); + library->fill(tensor, distribution, i); +} + +/** Information about how to fill tensors */ +struct TensorFillInfo +{ + // Bias fill range. Default values are arbitrary + int32_t min_bias {-20000}; + int32_t max_bias {20000}; + // Optional extra hash to randomize tensor filling + int32_t hash {0}; +}; + template -TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, - GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(), DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8, - QuantizationInfo b_qinfo = QuantizationInfo(), bool reshape_b_only_on_first_run = false) +TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, + const QuantizationInfo& output_qinfo, DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8, + GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(), bool reshape_b_only_on_first_run = false, const TensorFillInfo& finfo = TensorFillInfo() ) { + ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type_a)); + ARM_COMPUTE_ASSERT(data_type_a == data_type_b); // Create tensors - DataType data_type_output = output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : data_type_a; + const DataType data_type_output = output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : data_type_a; - TensorType a = create_tensor(shape_a, data_type_a, 1); - TensorType b = create_tensor(shape_b, data_type_b, 1); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated - TensorType output = create_tensor(shape_output, data_type_output, 1); + TensorType a = create_tensor(shape_a, data_type_a, 1, a_qinfo); + TensorType b = create_tensor(shape_b, data_type_b, 1, b_qinfo); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated + TensorType output = create_tensor(shape_output, data_type_output, 1, output_qinfo /* output_qinfo will be ignored when output stage type is None */); - a.info()->set_quantization_info(QuantizationInfo(1.0f / 255, a_offset)); - - if(data_type_b == DataType::QSYMM8_PER_CHANNEL) - { - b.info()->set_quantization_info(b_qinfo); - } - else - { - b.info()->set_quantization_info(QuantizationInfo(1.0f / 255, b_offset)); - } TensorType bias; if(is_fused) { @@ -142,26 +111,26 @@ TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape ARM_COMPUTE_ASSERT(!output.info()->is_resizable()); // Fill tensors - fill(AccessorType(a), 0); - fill(AccessorType(b), 1); + fill(AccessorType(a), 0 + finfo.hash); + fill(AccessorType(b), 1 + finfo.hash); if(is_fused) { ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); bias.allocator()->allocate(); ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); - fill(AccessorType(bias), 2); + fill_bias_s32(AccessorType(bias), 2 + finfo.hash, finfo.min_bias, finfo.max_bias); } // Run with variable inputs. if(run_twice) { gemmlowp.run(); - fill(AccessorType(a), 3); // Fill tensors with new seed after run - fill(AccessorType(b), 4); + fill(AccessorType(a), 3 + finfo.hash); // Fill tensors with new seed after run + fill(AccessorType(b), 4 + finfo.hash); if(is_fused) { - fill(AccessorType(bias), 5); + fill_bias_s32(AccessorType(bias), 5 + finfo.hash, finfo.min_bias, finfo.max_bias); } } @@ -171,9 +140,11 @@ TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape } template -SimpleTensor compute_gemmlowp_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, - DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8, QuantizationInfo b_qinfo = QuantizationInfo()) +SimpleTensor compute_gemmlowp_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, + DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8, const TensorFillInfo& finfo = TensorFillInfo()) { + ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type_a)); + ARM_COMPUTE_ASSERT(data_type_a == data_type_b); TensorShape shape_a_to_use = shape_a; if(reinterpret_input_as_3d) { @@ -182,8 +153,8 @@ SimpleTensor compute_gemmlowp_reference(const TensorShape &shape_a, con } // Create reference - SimpleTensor a{ shape_a_to_use, data_type_a, 1 }; - SimpleTensor b{ shape_b, data_type_b, 1, data_type_b == DataType::QSYMM8_PER_CHANNEL ? b_qinfo : QuantizationInfo(1.0f / 255, b_offset) }; + SimpleTensor a{ shape_a_to_use, data_type_a, 1, a_qinfo }; + SimpleTensor b{ shape_b, data_type_b, 1, b_qinfo }; TensorShape shape_a_to_use_transposed{ shape_a_to_use }; TensorShape shape_b_transposed{ shape_b }; @@ -193,12 +164,12 @@ SimpleTensor compute_gemmlowp_reference(const TensorShape &shape_a, con shape_b_transposed.set(0, shape_b[1]); shape_b_transposed.set(1, shape_b[0]); - SimpleTensor a_transposed{ shape_a_to_use_transposed, data_type_a, 1 }; - SimpleTensor b_transposed{ shape_b_transposed, data_type_b, 1, data_type_b == DataType::QSYMM8_PER_CHANNEL ? b_qinfo : QuantizationInfo(1.0f / 255, b_offset) }; + SimpleTensor a_transposed{ shape_a_to_use_transposed, data_type_a, 1, a_qinfo }; + SimpleTensor b_transposed{ shape_b_transposed, data_type_b, 1, b_qinfo }; // Fill reference - fill(a, 0); - fill(b, 1); + fill(a, 0 + finfo.hash); + fill(b, 1 + finfo.hash); // Transpose reference if required /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M), @@ -216,16 +187,18 @@ SimpleTensor compute_gemmlowp_reference(const TensorShape &shape_a, con } // Run with variable inputs. + const int32_t a_offset = a_qinfo.uniform().offset; + const int32_t b_offset = b_qinfo.uniform().offset; if(run_twice) { reference::gemmlowp_matrix_multiply_core((pretranspose_A ? a_transposed : a), (pretranspose_B ? b_transposed : b), shape_output, a_offset, b_offset); - fill((pretranspose_A) ? a_transposed : a, 3); - fill((pretranspose_B) ? b_transposed : b, 4); + fill((pretranspose_A) ? a_transposed : a, 3 + finfo.hash); + fill((pretranspose_B) ? b_transposed : b, 4 + finfo.hash); } return reference::gemmlowp_matrix_multiply_core((pretranspose_A ? a_transposed : a), (pretranspose_B ? b_transposed : b), shape_output, a_offset, b_offset); } -} +} // namespace template class GEMMLowpMatrixMultiplyCoreValidationFixture : public framework::Fixture @@ -233,20 +206,22 @@ class GEMMLowpMatrixMultiplyCoreValidationFixture : public framework::Fixture public: void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset) { - _target = compute_target(shape_a, shape_b, shape_output, a_offset, b_offset); - _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, b_offset); + const auto a_qinfo = QuantizationInfo(1.0f / 255, a_offset); + const auto b_qinfo = QuantizationInfo(1.0f / 255, b_offset); + _target = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo); + _reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo); } protected: - TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset) + TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo) { - return compute_gemmlowp_target(shape_a, shape_b, shape_output, a_offset, - b_offset); + const auto output_qinfo = QuantizationInfo(); // No output stage + return compute_gemmlowp_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo); } - SimpleTensor compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset) + SimpleTensor compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo) { - return compute_gemmlowp_reference(shape_a, shape_b, shape_output, a_offset, b_offset); + return compute_gemmlowp_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo); } TensorType _target{}; @@ -257,54 +232,138 @@ template + static void setup_quantization(DataType data_type, const TensorShape& shape_a, const TensorShape& shape_b, QuantizationInfo& a_qinfo, QuantizationInfo& b_qinfo, QuantizationInfo& output_qinfo, TensorFillInfo& finfo) + { + // This hash is used by random generators. There may be hash collisions but + // this is intentional as it's a very easy way to make the the current + // random generation process almost different for many test configurations, + // which were using the same set of values before. + finfo.hash = shape_a[0] + shape_a[1] + shape_b[0] + shape_b[1]; + + const int32_t t_max = static_cast(std::numeric_limits::max()); + const int32_t t_min = static_cast(std::numeric_limits::min()); + + std::mt19937 generator(library->seed() + finfo.hash); + std::uniform_real_distribution distribution_float(-5.0f, 3.0f); + std::uniform_int_distribution distribution_t(t_min, t_max); + + const float scale_lhs = pow(2, distribution_float(generator)); // [2^-5, 2^3] + const float scale_rhs = pow(2, distribution_float(generator)); // [2^-5, 2^3] + + const int32_t offset_lhs = distribution_t(generator); + const int32_t offset_rhs = distribution_t(generator); + + a_qinfo = QuantizationInfo(scale_lhs, offset_lhs); + b_qinfo = QuantizationInfo(scale_rhs, offset_rhs); + + // reinterpret_input_as_3d or reinterpret_output_as_3d can be ignored, as the underlying gemm / matmul computation + // is equivalent to a standard 2D one with m-n-k dimensions + const int m = shape_a.y(); + const int n = shape_b.x(); + const int k = shape_a.x(); + + const float bias_fraction = 0.5f; // We enabled is_fused in compute_gemmlowp_target below, thus bias is included + + QuantizationHint q_hint = suggest_matmul_dst_q_info_and_bias(a_qinfo, b_qinfo, m, n, k, data_type, bias_fraction); + output_qinfo = q_hint.q_info; + finfo.min_bias = q_hint.bias_min; + finfo.max_bias = q_hint.bias_max; + + // Both target and reference implementations use negated offsets, i.e. + // float_val = (int_val + offset) * scale + // instead of + // float_val = (int_val - offset) * scale + // as usual. Therefore, after calculating the output quantization above, we + // negate the offsets of inputs' offsets. + a_qinfo = QuantizationInfo(scale_lhs, -offset_lhs); + b_qinfo = QuantizationInfo(scale_rhs, -offset_rhs); + } + + /** Initialize output stage info from quantization info */ + static Status init_gemmlowp_output_stage_info( + DataType data_type, + const QuantizationInfo& a_qinfo, + const QuantizationInfo& b_qinfo, + const QuantizationInfo& output_qinfo, + GEMMLowpOutputStageType type, + GEMMLowpOutputStageInfo &gemmlowp_output_stage_info) + { + ARM_COMPUTE_RETURN_ERROR_ON(!is_data_type_quantized_asymmetric(data_type)); + + const UniformQuantizationInfo aq_unif = a_qinfo.uniform(); + const UniformQuantizationInfo bq_unif = b_qinfo.uniform(); + const UniformQuantizationInfo oq_unif = output_qinfo.uniform(); + + float multiplier = (aq_unif.scale * bq_unif.scale) / oq_unif.scale; + int32_t int_multiplier; + int32_t shift; + + ARM_COMPUTE_RETURN_ON_ERROR( + quantization::calculate_quantized_multiplier(multiplier, &int_multiplier, &shift)); + + int32_t type_min = 0; + int32_t type_max = 0; + std::tie(type_min, type_max) = quantization::get_quantized_asymmetric_output_min_max(output_qinfo, ActivationLayerInfo(), data_type); + + gemmlowp_output_stage_info.gemmlowp_real_multiplier = multiplier; + gemmlowp_output_stage_info.gemmlowp_multiplier = int_multiplier; + gemmlowp_output_stage_info.gemmlowp_multipliers = { int_multiplier }; + gemmlowp_output_stage_info.gemmlowp_shift = shift; + gemmlowp_output_stage_info.gemmlowp_shifts = { shift }; + gemmlowp_output_stage_info.gemmlowp_offset = oq_unif.offset; + gemmlowp_output_stage_info.type = type; + gemmlowp_output_stage_info.gemmlowp_min_bound = type_min; + gemmlowp_output_stage_info.gemmlowp_max_bound = type_max; + + return Status{}; + } + + /** Currently this fixture only tests the following data type configurations: + * + * 1. a and b are of the same data type + * 2. The data type is quantized asymmetric + * + */ + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, GEMMLowpOutputStageType output_stage_type, DataType data_type, bool reshape_b_only_on_first_run) { - ARM_COMPUTE_ASSERT(output_stage.type != GEMMLowpOutputStageType::NONE); - DataType data_type_a = data_type_b == DataType::QASYMM8_SIGNED ? DataType::QASYMM8_SIGNED : DataType::QASYMM8; + ARM_COMPUTE_ASSERT(output_stage_type != GEMMLowpOutputStageType::NONE); + ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type)); - if(data_type_b == DataType::QSYMM8_PER_CHANNEL) - { - output_stage.is_quantized_per_channel = true; - const size_t num_channels = shape_b[0]; - std::vector scales(num_channels); - std::uniform_real_distribution distribution(0.f, 1.f); - library->fill(scales, distribution, 0); - output_stage.gemmlowp_multipliers.resize(num_channels); - output_stage.gemmlowp_shifts.resize(num_channels); - for(size_t i = 0; i < num_channels; ++i) - { - quantization::calculate_quantized_multiplier(scales[i], &output_stage.gemmlowp_multipliers[i], &output_stage.gemmlowp_shifts[i]); - } + // Randomized dynamic quantization: randomize quantization info in a way that ensures no result saturation + // most of the time + QuantizationInfo a_qinfo; + QuantizationInfo b_qinfo; + QuantizationInfo output_qinfo; + TensorFillInfo finfo; + setup_quantization(data_type, shape_a, shape_b, a_qinfo, b_qinfo, output_qinfo, finfo); - _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, 0, output_stage, data_type_a, data_type_b, QuantizationInfo(scales)); - _target = compute_target(shape_a, shape_b, shape_output, a_offset, 0, output_stage, data_type_a, data_type_b, QuantizationInfo(scales), reshape_b_only_on_first_run); - } - else - { - _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage, data_type_a, data_type_b, QuantizationInfo()); - _target = compute_target(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage, data_type_a, data_type_b, QuantizationInfo(), reshape_b_only_on_first_run); - } + GEMMLowpOutputStageInfo output_stage; + init_gemmlowp_output_stage_info(data_type, a_qinfo, b_qinfo, output_qinfo, output_stage_type, output_stage); + + _reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type, data_type, output_stage, finfo); + _target = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, data_type, data_type, output_stage, reshape_b_only_on_first_run, finfo); } protected: - TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage, - DataType data_type_a, DataType data_type_b, QuantizationInfo b_qinfo, bool reshape_b_only_on_first_run = false) + TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const QuantizationInfo& output_qinfo, + DataType data_type_a, DataType data_type_b, const GEMMLowpOutputStageInfo& output_stage, bool reshape_b_only_on_first_run = false, const TensorFillInfo& finfo = TensorFillInfo()) { - return compute_gemmlowp_target(shape_a, shape_b, shape_output, a_offset, - b_offset, - output_stage, data_type_a, data_type_b, b_qinfo, reshape_b_only_on_first_run); + return compute_gemmlowp_target(shape_a, shape_b, shape_output, a_qinfo, + b_qinfo, output_qinfo, data_type_a, data_type_b, output_stage, reshape_b_only_on_first_run, finfo); } - SimpleTensor compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, - GEMMLowpOutputStageInfo output_stage, DataType data_type_a, DataType data_type_b, QuantizationInfo b_qinfo) + SimpleTensor compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, + DataType data_type_a, DataType data_type_b, const GEMMLowpOutputStageInfo& output_stage, const TensorFillInfo& finfo = TensorFillInfo()) { - SimpleTensor output = compute_gemmlowp_reference(shape_a, shape_b, shape_output, a_offset, b_offset, data_type_a, data_type_b, - b_qinfo); + SimpleTensor output = compute_gemmlowp_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type_a, data_type_b, finfo); TensorShape bias_shape(shape_b[0]); SimpleTensor bias{ bias_shape, DataType::S32, 1 }; - (run_twice) ? fill(bias, 5) : fill(bias, 2); // Fill bias with same seed as last run of gemmlowp_target + (run_twice) ? fill_bias_s32(bias, 5 + finfo.hash, finfo.min_bias, finfo.max_bias) : fill_bias_s32(bias, 2 + finfo.hash, finfo.min_bias, finfo.max_bias); // Fill bias with same seed as last run of gemmlowp_target switch(output_stage.type) { @@ -330,10 +389,10 @@ class GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture : public GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture { public: - void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage, DataType data_type_b) + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, GEMMLowpOutputStageType output_stage_type, DataType data_type) { GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture::setup(shape_a, shape_b, - shape_output, a_offset, b_offset, output_stage, data_type_b, false); + shape_output, output_stage_type, data_type, false /* reshape_b_only_on_first_run */); } }; @@ -2076,4 +2135,4 @@ protected: } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_GEMMLOWPFIXTURE_H -- cgit v1.2.1