diff options
Diffstat (limited to 'tests')
-rw-r--r-- | tests/datasets/LargeMatMulDataset.h (renamed from tests/datasets/LargeBatchMatMulDataset.h) | 16 | ||||
-rw-r--r-- | tests/datasets/MatMulDataset.h (renamed from tests/datasets/BatchMatMulDataset.h) | 14 | ||||
-rw-r--r-- | tests/datasets/SmallMatMulDataset.h (renamed from tests/datasets/SmallBatchMatMulDataset.h) | 23 | ||||
-rw-r--r-- | tests/validation/CL/BatchMatMul.cpp | 239 | ||||
-rw-r--r-- | tests/validation/CL/MatMulKernel.cpp | 391 | ||||
-rw-r--r-- | tests/validation/fixtures/MatMulKernelFixture.h (renamed from tests/validation/fixtures/BatchMatMulFixture.h) | 16 |
6 files changed, 431 insertions, 268 deletions
diff --git a/tests/datasets/LargeBatchMatMulDataset.h b/tests/datasets/LargeMatMulDataset.h index 0d8ff913cf..cbc97d5e4a 100644 --- a/tests/datasets/LargeBatchMatMulDataset.h +++ b/tests/datasets/LargeMatMulDataset.h @@ -21,12 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ACL_TESTS_DATASETS_LARGEBATCHMATMULDATASET -#define ACL_TESTS_DATASETS_LARGEBATCHMATMULDATASET +#ifndef ACL_TESTS_DATASETS_LARGEMATMULDATASET +#define ACL_TESTS_DATASETS_LARGEMATMULDATASET #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" -#include "tests/datasets/BatchMatMulDataset.h" +#include "tests/datasets/MatMulDataset.h" namespace arm_compute { @@ -34,10 +34,10 @@ namespace test { namespace datasets { -class LargeBatchMatMulDataset final : public BatchMatMulDataset +class LargeMatMulDataset final : public MatMulDataset { public: - LargeBatchMatMulDataset() + LargeMatMulDataset() { add_config(TensorShape(21U, 13U, 3U, 2U), TensorShape(33U, 21U, 3U, 2U), TensorShape(33U, 13U, 3U, 2U)); add_config(TensorShape(38U, 12U, 1U, 5U), TensorShape(21U, 38U, 1U, 5U), TensorShape(21U, 12U, 1U, 5U)); @@ -45,10 +45,10 @@ public: } }; -class HighDimensionalBatchMatMulDataset final : public BatchMatMulDataset +class HighDimensionalMatMulDataset final : public MatMulDataset { public: - HighDimensionalBatchMatMulDataset() + HighDimensionalMatMulDataset() { add_config(TensorShape(5U, 5U, 2U, 2U, 2U, 2U), TensorShape(5U, 5U, 2U, 2U, 2U, 2U), TensorShape(5U, 5U, 2U, 2U, 2U, 2U)); // 6D tensor } @@ -57,4 +57,4 @@ public: } // namespace datasets } // namespace test } // namespace arm_compute -#endif /* ACL_TESTS_DATASETS_LARGEBATCHMATMULDATASET */ +#endif /* ACL_TESTS_DATASETS_LARGEMATMULDATASET */ diff --git a/tests/datasets/BatchMatMulDataset.h b/tests/datasets/MatMulDataset.h index dad7cc0af4..9c1c5fb05d 100644 --- a/tests/datasets/BatchMatMulDataset.h +++ b/tests/datasets/MatMulDataset.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef TESTS_DATASETS_BATCHMATMULDATASET -#define TESTS_DATASETS_BATCHMATMULDATASET +#ifndef ACL_TESTS_DATASETS_MATMULDATASET +#define ACL_TESTS_DATASETS_MATMULDATASET #include "arm_compute/core/TensorShape.h" #include "utils/TypePrinter.h" @@ -33,7 +33,7 @@ namespace test { namespace datasets { -class BatchMatMulDataset +class MatMulDataset { public: using type = std::tuple<TensorShape, TensorShape, TensorShape>; @@ -58,7 +58,7 @@ public: return description.str(); } - BatchMatMulDataset::type operator*() const + MatMulDataset::type operator*() const { return std::make_tuple(*_a_it, *_b_it, *_dst_it); } @@ -96,8 +96,8 @@ public: } protected: - BatchMatMulDataset() = default; - BatchMatMulDataset(BatchMatMulDataset &&) = default; + MatMulDataset() = default; + MatMulDataset(MatMulDataset &&) = default; private: std::vector<TensorShape> _a_shapes{}; @@ -107,4 +107,4 @@ private: } // namespace datasets } // namespace test } // namespace arm_compute -#endif /* TESTS_DATASETS_BATCHMATMULDATASET */ +#endif /* ACL_TESTS_DATASETS_MATMULDATASET */ diff --git a/tests/datasets/SmallBatchMatMulDataset.h b/tests/datasets/SmallMatMulDataset.h index cfe76bea6d..ae92b9abf5 100644 --- a/tests/datasets/SmallBatchMatMulDataset.h +++ b/tests/datasets/SmallMatMulDataset.h @@ -21,12 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ACL_TESTS_DATASETS_SMALLBATCHMATMULDATASET -#define ACL_TESTS_DATASETS_SMALLBATCHMATMULDATASET +#ifndef ACL_TESTS_DATASETS_SMALLMATMULDATASET +#define ACL_TESTS_DATASETS_SMALLMATMULDATASET #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" -#include "tests/datasets/BatchMatMulDataset.h" +#include "tests/datasets/MatMulDataset.h" namespace arm_compute { @@ -34,10 +34,10 @@ namespace test { namespace datasets { -class SmallBatchMatMulDataset final : public BatchMatMulDataset +class SmallMatMulDataset final : public MatMulDataset { public: - SmallBatchMatMulDataset() + SmallMatMulDataset() { add_config(TensorShape(3U, 4U, 2U, 2U), TensorShape(2U, 3U, 2U, 2U), TensorShape(2U, 4U, 2U, 2U)); add_config(TensorShape(9U, 6U), TensorShape(5U, 9U), TensorShape(5U, 6U)); @@ -46,7 +46,18 @@ public: add_config(TensorShape(32U, 2U), TensorShape(17U, 32U), TensorShape(17U, 2U)); } }; + +class TinyMatMulDataset final : public MatMulDataset +{ +public: + TinyMatMulDataset() + { + add_config(TensorShape(1U), TensorShape(1U), TensorShape(1U)); + add_config(TensorShape(2U, 2U), TensorShape(2U, 2U), TensorShape(2U, 2U)); + } +}; + } // namespace datasets } // namespace test } // namespace arm_compute -#endif /* ACL_TESTS_DATASETS_SMALLBATCHMATMULDATASET */ +#endif /* ACL_TESTS_DATASETS_SMALLMATMULDATASET */ diff --git a/tests/validation/CL/BatchMatMul.cpp b/tests/validation/CL/BatchMatMul.cpp deleted file mode 100644 index fd84526000..0000000000 --- a/tests/validation/CL/BatchMatMul.cpp +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/CLTensor.h" -#include "src/gpu/cl/kernels/ClNativeMatMulKernel.h" -#include "tests/datasets/LargeBatchMatMulDataset.h" -#include "tests/datasets/SmallBatchMatMulDataset.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/BatchMatMulFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ -constexpr float abs_tolerance_f32( - 0.0001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for floating point data types in case using relative tolerance fails because of small values */ -constexpr float abs_tolerance_f16( - 0.001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for fp16 data types in case using relative tolerance fails because of small values */ -RelativeTolerance<half_float::half> tolerance_f16(half(0.01)); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ -} // namespace - -/** M0 values to test --precommit*/ -const auto m0_values_precommit = framework::dataset::make("M0", { 1, 3 }); - -/** N0 values to test --precommit*/ -const auto n0_values_precommit = framework::dataset::make("N0", { 2, 4 }); - -/** K0 values to test --precommit*/ -const auto k0_values_precommit = framework::dataset::make("K0", { 2, 3 }); - -/** M0 values to test --nightly*/ -const auto m0_values_nightly_lhs_nt = framework::dataset::make("M0", { 1, 2, 3, 4, 5, 6, 7, 8 }); -// const auto m0_values_nightly_lhs_t = framework::dataset::make("M0", { 1, 2, 3, 4, 8 }); // To be enabled - -/** N0 values to test --nightly*/ -const auto n0_values_nightly_rhs_nt = framework::dataset::make("N0", { 1, 2, 3, 4, 8, 16 }); -const auto n0_values_nightly_rhs_t = framework::dataset::make("N0", { 1, 2, 3, 4, 8 }); - -/** K0 values to test --nightly*/ -const auto k0_values_nightly_lhs_nt_rhs_nt = framework::dataset::make("K0", { 1, 2, 3, 4, 8, 16 }); -const auto k0_values_nightly_lhs_nt_rhs_t = framework::dataset::make("K0", { 1, 2, 3, 4, 8 }); -// const auto k0_values_nightly_lhs_t_rhs_nt = framework::dataset::make("K0", { 1, 2, 3, 4, 5, 6, 7, 8 }); // To be enabled - -template <typename T> -using CLBatchMatMulFixture = BatchMatMulValidationFixture<T>; - -TEST_SUITE(CL) -TEST_SUITE(BatchMatMul) -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( - framework::dataset::make("LhsInfo", -{ - TensorInfo(TensorShape(27U, 13U), 1, DataType::S32), // Unsupported data type - TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), -}), -framework::dataset::make("RhsInfo", -{ - TensorInfo(TensorShape(8U, 27U), 1, DataType::S32), TensorInfo(TensorShape(8U, 27U), 1, DataType::F32), TensorInfo(TensorShape(8U, 27U), 1, DataType::F32), TensorInfo(TensorShape(8U, 27U), 1, DataType::F32), TensorInfo(TensorShape(8U, 27U), 1, DataType::F32), TensorInfo(TensorShape(8U, 27U), 1, DataType::F32), TensorInfo(TensorShape(8U, 27U), 1, DataType::F32), TensorInfo(TensorShape(8U, 27U), 1, DataType::F32), TensorInfo(TensorShape(8U, 27U), 1, DataType::F32), TensorInfo(TensorShape(8U, 27U), 1, DataType::F32), -})), -framework::dataset::make("OutputInfo", -{ - TensorInfo(TensorShape(8U, 13U), 1, DataType::S32), TensorInfo(TensorShape(8U, 13U), 1, DataType::F32), TensorInfo(TensorShape(8U, 13U), 1, DataType::F32), TensorInfo(TensorShape(8U, 13U), 1, DataType::F32), TensorInfo(TensorShape(8U, 13U), 1, DataType::F32), TensorInfo(TensorShape(8U, 13U), 1, DataType::F32), TensorInfo(TensorShape(8U, 13U), 1, DataType::F32), TensorInfo(TensorShape(8U, 13U), 1, DataType::F32), TensorInfo(TensorShape(8U, 13U), 1, DataType::F32), TensorInfo(TensorShape(8U, 13U), 1, DataType::F32), -})), -framework::dataset::make("MatMulInfo", -{ - MatMulKernelInfo(false, false, 2, 2, 2, false), MatMulKernelInfo(false, false, 2, 2, 2, false), MatMulKernelInfo(false, false, 9, 2, 2, false), MatMulKernelInfo(false, false, 0, 2, 2, false), // M0 cannot be < 1 - MatMulKernelInfo(false, true, 4, 5, 2, false), // For LHS NT RHS NT: N0 cannot be 5 - MatMulKernelInfo(false, true, 4, 6, 2, false), // For LHS NT RHS NT: N0 cannot be 6 - MatMulKernelInfo(false, true, 4, 9, 2, false), // For LHS NT RHS NT: N0 cannot be 9 - MatMulKernelInfo(false, true, 4, 10, 2, false), // For LHS NT RHS NT: N0 cannot be 10 - MatMulKernelInfo(false, true, 4, 11, 2, false), // For LHS NT RHS NT: N0 cannot be 11 - MatMulKernelInfo(false, true, 4, 17, 2, false), // For LHS NT RHS NT: N0 cannot be 17 -})), -framework::dataset::make("Expected", { false, true, true, false, false, false, false, false, false, false })), -lhs_info, rhs_info, output_info, matmul_info, expected) -{ - bool is_valid = bool(ClNativeMatMulKernel::validate(&lhs_info, &rhs_info, &output_info, matmul_info)); - ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); -} -TEST_SUITE(Float) -TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmallNoTranspose, CLBatchMatMulFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(datasets::SmallBatchMatMulDataset(), - framework::dataset::make("pretransose_A", { false })), - framework::dataset::make("pretransose_B", { false })), - m0_values_precommit), - n0_values_precommit), - k0_values_precommit), - framework::dataset::make("DataType", DataType::F32))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); -} -FIXTURE_DATA_TEST_CASE(RunSmallRhsTransposed, CLBatchMatMulFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(datasets::SmallBatchMatMulDataset(), - framework::dataset::make("pretransose_A", { false })), - framework::dataset::make("pretransose_B", { true })), - m0_values_precommit), - n0_values_precommit), - k0_values_precommit), - framework::dataset::make("DataType", DataType::F32))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); -} -FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLBatchMatMulFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(datasets::LargeBatchMatMulDataset(), - framework::dataset::make("pretransose_A", { false })), - framework::dataset::make("pretransose_B", { false })), - m0_values_nightly_lhs_nt), - n0_values_nightly_rhs_nt), - k0_values_nightly_lhs_nt_rhs_nt), - framework::dataset::make("DataType", DataType::F32))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); -} -// Running High Dimensional test is enough for FP32, because we're stressing the number of dimensions, not data type or M0/N0/K0 -// It's a good idea to test for each Lhs/Rhs T/NT combinations because they're different CL kernels -FIXTURE_DATA_TEST_CASE(RunHighDimNoTranspose, CLBatchMatMulFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(datasets::HighDimensionalBatchMatMulDataset(), - framework::dataset::make("pretransose_A", { false })), - framework::dataset::make("pretransose_B", { false })), - framework::dataset::make("M0", { 2 })), - framework::dataset::make("N0", { 2 })), - framework::dataset::make("K0", { 2 })), - framework::dataset::make("DataType", DataType::F32))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); -} -FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLBatchMatMulFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(datasets::LargeBatchMatMulDataset(), - framework::dataset::make("pretransose_A", { false })), - framework::dataset::make("pretransose_B", { true })), - m0_values_nightly_lhs_nt), - n0_values_nightly_rhs_t), - k0_values_nightly_lhs_nt_rhs_t), - framework::dataset::make("DataType", DataType::F32))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); -} -FIXTURE_DATA_TEST_CASE(RunHighDimRhsTransposed, CLBatchMatMulFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(datasets::HighDimensionalBatchMatMulDataset(), - framework::dataset::make("pretransose_A", { false })), - framework::dataset::make("pretransose_B", { true })), - framework::dataset::make("M0", { 2 })), - framework::dataset::make("N0", { 2 })), - framework::dataset::make("K0", { 2 })), - framework::dataset::make("DataType", DataType::F32))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); -} -TEST_SUITE_END() // FP32 - -TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmallNoTranspose, CLBatchMatMulFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(datasets::SmallBatchMatMulDataset(), - framework::dataset::make("pretransose_A", { false })), - framework::dataset::make("pretransose_B", { false })), - m0_values_precommit), - n0_values_precommit), - k0_values_precommit), - framework::dataset::make("DataType", DataType::F16))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); -} -FIXTURE_DATA_TEST_CASE(RunSmallRhsTransposed, CLBatchMatMulFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(datasets::SmallBatchMatMulDataset(), - framework::dataset::make("pretransose_A", { false })), - framework::dataset::make("pretransose_B", { true })), - m0_values_precommit), - n0_values_precommit), - k0_values_precommit), - framework::dataset::make("DataType", DataType::F16))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); -} -FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLBatchMatMulFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(datasets::LargeBatchMatMulDataset(), - framework::dataset::make("pretransose_A", { false })), - framework::dataset::make("pretransose_B", { false })), - m0_values_nightly_lhs_nt), - n0_values_nightly_rhs_nt), - k0_values_nightly_lhs_nt_rhs_nt), - framework::dataset::make("DataType", DataType::F16))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); -} -FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLBatchMatMulFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(datasets::LargeBatchMatMulDataset(), - framework::dataset::make("pretransose_A", { false })), - framework::dataset::make("pretransose_B", { true })), - m0_values_nightly_lhs_nt), - n0_values_nightly_rhs_t), - k0_values_nightly_lhs_nt_rhs_t), - framework::dataset::make("DataType", DataType::F16))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); -} -TEST_SUITE_END() // FP16 - -TEST_SUITE_END() // Float -TEST_SUITE_END() // BatchMatMul -TEST_SUITE_END() // CL -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/CL/MatMulKernel.cpp b/tests/validation/CL/MatMulKernel.cpp new file mode 100644 index 0000000000..5d2e59ab4c --- /dev/null +++ b/tests/validation/CL/MatMulKernel.cpp @@ -0,0 +1,391 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/CL/CLTensor.h" +#include "src/gpu/cl/kernels/ClNativeMatMulKernel.h" +#include "tests/datasets/LargeMatMulDataset.h" +#include "tests/datasets/SmallMatMulDataset.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/MatMulKernelFixture.h" +#include "tests/validation/reference/Permute.h" + +#include <tuple> + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ +constexpr float abs_tolerance_f32( + 0.0001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for floating point data types in case using relative tolerance fails because of small values */ +constexpr float abs_tolerance_f16( + 0.001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for fp16 data types in case using relative tolerance fails because of small values */ +RelativeTolerance<half_float::half> tolerance_f16(half(0.01)); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ +} // namespace + +/** M0 values to test --precommit*/ +const auto m0_values_precommit = framework::dataset::make("M0", { 1, 3 }); + +/** N0 values to test --precommit*/ +const auto n0_values_precommit = framework::dataset::make("N0", { 2, 4 }); + +/** K0 values to test --precommit*/ +const auto k0_values_precommit = framework::dataset::make("K0", { 2, 3 }); + +/** M0 values to test --nightly*/ +const auto m0_values_nightly_lhs_nt = framework::dataset::make("M0", { 1, 2, 3, 4, 5, 6, 7, 8 }); +const auto m0_values_nightly_lhs_t = framework::dataset::make("M0", { 1, 2, 3, 4, 8 }); + +/** N0 values to test --nightly*/ +const auto n0_values_nightly_rhs_nt = framework::dataset::make("N0", { 1, 2, 3, 4, 8, 16 }); +const auto n0_values_nightly_rhs_t = framework::dataset::make("N0", { 1, 2, 3, 4, 8 }); + +/** K0 values to test --nightly*/ +const auto k0_values_nightly_lhs_nt_rhs_nt = framework::dataset::make("K0", { 1, 2, 3, 4, 8, 16 }); +const auto k0_values_nightly_rhs_t = framework::dataset::make("K0", { 1, 2, 3, 4, 8 }); +const auto k0_values_nightly_lhs_t_rhs_nt = framework::dataset::make("K0", { 1, 2, 3, 4, 5, 6, 7, 8 }); + +template <typename T> +using CLMatMulKernelFixture = MatMulKernelValidationFixture<T>; + +TEST_SUITE(CL) +TEST_SUITE(MatMulKernel) +TEST_SUITE(Validate) + +TEST_CASE(SupportedBlockSizes, framework::DatasetMode::ALL) +{ + using MatMulConfigurationPair = std::pair<MatMulKernelInfo, bool>; + + const std::vector<MatMulConfigurationPair> supported_block_sizes = + { + // MatMulKernelInfo(adj_lhs, adj_rhs, M0, N0, K0, export_rhs_to_cl_image = false) + // Lhs not-transposed, Rhs-not-transposed + { MatMulKernelInfo(false, false, 0, 1, 1), false }, // M0 should be > 0 + { MatMulKernelInfo(false, false, 3, 5, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, false, 3, 6, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, false, 3, 3, 17), false }, // K0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, false, 3, 3, 7), false }, // K0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, false, 9, 1, 2), true }, + { MatMulKernelInfo(false, false, 3, 16, 3), true }, + { MatMulKernelInfo(false, false, 7, 3, 4), true }, + + // Lhs not-transposed, Rhs transposed + { MatMulKernelInfo(false, true, 0, 1, 1), false }, // M0 should be > 0 + { MatMulKernelInfo(false, true, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, true, 3, 7, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, true, 3, 3, 12), false }, // K0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, true, 3, 3, 6), false }, // K0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, true, 5, 1, 2), true }, + { MatMulKernelInfo(false, true, 3, 3, 3), true }, + { MatMulKernelInfo(false, true, 2, 4, 8), true }, + + // // Lhs transposed, Rhs-not-transposed + { MatMulKernelInfo(true, false, 1, 1, 0), false }, // K0 should be > 0 + { MatMulKernelInfo(true, false, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, false, 3, 7, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, false, 6, 3, 12), false }, // M0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, false, 5, 3, 6), false }, // M0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, false, 4, 1, 22), true }, + { MatMulKernelInfo(true, false, 3, 3, 3), true }, + { MatMulKernelInfo(true, false, 2, 4, 8), true }, + + // // Lhs transposed, Rhs-transposed + { MatMulKernelInfo(true, true, 2, 1, 5), false }, // K0 should in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, true, 1, 8, 7), false }, // K0 should in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, true, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, true, 3, 7, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, true, 6, 3, 12), false }, // M0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, true, 5, 3, 6), false }, // M0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, true, 4, 8, 16), true }, + { MatMulKernelInfo(true, true, 3, 3, 4), true }, + { MatMulKernelInfo(true, true, 16, 4, 8), true }, + }; + + // Set big enough shapes so that block sizes are not truncated. Also, set all dimensions equal + // so that it doesn't fail for different NT/T configurations. We aim to test the block sizes here, + // not the shapes themselves. + const TensorInfo lhs_info = TensorInfo(TensorShape(100U, 100U), 1, DataType::F32); + const TensorInfo rhs_info = TensorInfo(TensorShape(100U, 100U), 1, DataType::F32); + + for(auto &pair : supported_block_sizes) + { + TensorInfo output_info; + Status status = ClNativeMatMulKernel::validate(&lhs_info, &rhs_info, &output_info, pair.first); + + ARM_COMPUTE_EXPECT(bool(status) == pair.second, framework::LogLevel::ERRORS); + } +} + +TEST_CASE(ValidateInputShapes, framework::DatasetMode::ALL) +{ + // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations + using ShapeConfigurationTuple = std::tuple<TensorShape, TensorShape, bool>; + const std::vector<ShapeConfigurationTuple> shape_configurations = + { + { TensorShape(5U, 1U), TensorShape(3U, 5U), true }, + { TensorShape(10U, 12U), TensorShape(3U, 10U), true }, + { TensorShape(8U, 4U), TensorShape(2U, 8U), true }, + { TensorShape(8U, 4U), TensorShape(2U, 5U), false }, // Mismatch in the K dimension + { TensorShape(5U, 0U), TensorShape(2U, 5U), false }, // Invalid dimension + { TensorShape(5U, 4U, 3U, 4U, 5U, 6U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), true }, + { TensorShape(5U, 4U, 3U, 4U, 5U, 1U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), false }, // no batch broadcasting + { TensorShape(5U, 4U, 3U, 4U, 9U, 6U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), false }, // mismatch in batch dimension + }; + + for(auto &tuple : shape_configurations) + { + const bool expected = std::get<2>(tuple); + + for(bool adj_lhs : + { + false, true + }) + { + for(bool adj_rhs : + { + false, true + }) + { + TensorShape lhs_shape = std::get<0>(tuple); + TensorShape rhs_shape = std::get<1>(tuple); + + if(adj_lhs) + { + permute(lhs_shape, PermutationVector(1U, 0U)); + } + + if(adj_rhs) + { + permute(rhs_shape, PermutationVector(1U, 0U)); + } + + const TensorInfo lhs_info = TensorInfo(lhs_shape, 1, DataType::F32); + const TensorInfo rhs_info = TensorInfo(rhs_shape, 1, DataType::F32); + TensorInfo output_info; + + MatMulKernelInfo matmul_kernel_info{ adj_lhs, adj_rhs, 1, 1, 1, false /* export_rhs_to_cl_image */ }; + + Status status = ClNativeMatMulKernel::validate(&lhs_info, &rhs_info, &output_info, matmul_kernel_info); + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); + } + } + } +} + +TEST_CASE(ValidateDataTypes, framework::DatasetMode::ALL) +{ + // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations + using DataTypeConfigurationTuple = std::tuple<DataType, DataType, DataType, bool>; + const std::vector<DataTypeConfigurationTuple> data_type_configurations = + { + { DataType::F32, DataType::F32, DataType::F32, true }, + { DataType::F16, DataType::F16, DataType::F16, true }, + { DataType::F16, DataType::F32, DataType::F32, false }, // no mixed precision + { DataType::F64, DataType::F64, DataType::F64, false }, // no double precision + { DataType::QASYMM8, DataType::QASYMM8, DataType::QASYMM8, false }, // no quantized types + { DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, false }, // no quantized types + { DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8_PER_CHANNEL, false }, // no quantized types + { DataType::QASYMM16, DataType::QASYMM16, DataType::QASYMM16, false }, // no quantized types + { DataType::QSYMM16, DataType::QSYMM16, DataType::QSYMM16, false }, // no quantized types + { DataType::QSYMM8, DataType::QSYMM8, DataType::QSYMM8, false }, // no quantized types + { DataType::S64, DataType::S64, DataType::S64, false }, // no integral types + { DataType::S32, DataType::S32, DataType::S32, false }, // no integral types + { DataType::S16, DataType::S16, DataType::S16, false }, // no integral types + { DataType::S8, DataType::S8, DataType::S8, false }, // no integral types + { DataType::U64, DataType::U64, DataType::U64, false }, // no integral types + { DataType::U32, DataType::U32, DataType::U32, false }, // no integral types + { DataType::U16, DataType::U16, DataType::U16, false }, // no integral types + { DataType::U8, DataType::U8, DataType::U8, false }, // no integral types + }; + + const TensorShape shape = TensorShape(10U, 10U); + const MatMulKernelInfo matmul_kernel_info{ false, false, 1, 1, 1, false }; + for(auto &tuple : data_type_configurations) + { + const bool expected = std::get<3>(tuple); + + const TensorInfo lhs_info(shape, 1, std::get<0>(tuple)); + const TensorInfo rhs_info(shape, 1, std::get<1>(tuple)); + TensorInfo output_info(shape, 1, std::get<2>(tuple)); + + Status status = ClNativeMatMulKernel::validate(&lhs_info, &rhs_info, &output_info, matmul_kernel_info); + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); + } +} + +TEST_SUITE_END() // Validate + +TEST_SUITE(Float) +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunTiny, CLMatMulKernelFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(datasets::TinyMatMulDataset(), + framework::dataset::make("pretransose_A", { false, true })), + framework::dataset::make("pretransose_B", { false, true })), + m0_values_precommit), + n0_values_precommit), + k0_values_precommit), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulKernelFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(), + framework::dataset::make("pretransose_A", { false, true })), + framework::dataset::make("pretransose_B", { false, true })), + m0_values_precommit), + n0_values_precommit), + k0_values_precommit), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("pretransose_A", { false })), + framework::dataset::make("pretransose_B", { false })), + m0_values_nightly_lhs_nt), + n0_values_nightly_rhs_nt), + k0_values_nightly_lhs_nt_rhs_nt), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("pretransose_A", { false })), + framework::dataset::make("pretransose_B", { true })), + m0_values_nightly_lhs_nt), + n0_values_nightly_rhs_t), + k0_values_nightly_rhs_t), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("pretransose_A", { true })), + framework::dataset::make("pretransose_B", { false })), + m0_values_nightly_lhs_t), + n0_values_nightly_rhs_nt), + k0_values_nightly_lhs_t_rhs_nt), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposedRhsTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("pretransose_A", { true })), + framework::dataset::make("pretransose_B", { true })), + m0_values_nightly_lhs_t), + n0_values_nightly_rhs_t), + k0_values_nightly_rhs_t), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); +} +// Running High Dimensional test is enough for FP32, because we're stressing the number of dimensions, not data type or M0/N0/K0 +// It's a good idea to test for each Lhs/Rhs T/NT combinations because they're different CL kernels +FIXTURE_DATA_TEST_CASE(RunHighDimensional, CLMatMulKernelFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(datasets::HighDimensionalMatMulDataset(), + framework::dataset::make("pretransose_A", { false, true })), + framework::dataset::make("pretransose_B", { false, true })), + framework::dataset::make("M0", { 2 })), + framework::dataset::make("N0", { 2 })), + framework::dataset::make("K0", { 2 })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); +} +TEST_SUITE_END() // FP32 + +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulKernelFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(), + framework::dataset::make("pretransose_A", { false, true })), + framework::dataset::make("pretransose_B", { false, true })), + m0_values_precommit), + n0_values_precommit), + k0_values_precommit), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("pretransose_A", { false })), + framework::dataset::make("pretransose_B", { false })), + m0_values_nightly_lhs_nt), + n0_values_nightly_rhs_nt), + k0_values_nightly_lhs_nt_rhs_nt), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("pretransose_A", { false })), + framework::dataset::make("pretransose_B", { true })), + m0_values_nightly_lhs_nt), + n0_values_nightly_rhs_t), + k0_values_nightly_rhs_t), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("pretransose_A", { true })), + framework::dataset::make("pretransose_B", { false })), + m0_values_nightly_lhs_t), + n0_values_nightly_rhs_nt), + k0_values_nightly_lhs_t_rhs_nt), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposedRhsTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("pretransose_A", { true })), + framework::dataset::make("pretransose_B", { true })), + m0_values_nightly_lhs_t), + n0_values_nightly_rhs_t), + k0_values_nightly_rhs_t), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); +} +TEST_SUITE_END() // FP16 +TEST_SUITE_END() // Float +TEST_SUITE_END() // MatMulKernel +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/fixtures/BatchMatMulFixture.h b/tests/validation/fixtures/MatMulKernelFixture.h index 9fb2dcc1b7..459564618f 100644 --- a/tests/validation/fixtures/BatchMatMulFixture.h +++ b/tests/validation/fixtures/MatMulKernelFixture.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ACL_TESTS_VALIDATION_FIXTURES_BATCHMATMULFIXTURE -#define ACL_TESTS_VALIDATION_FIXTURES_BATCHMATMULFIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_MATMULKERNELFIXTURE +#define ACL_TESTS_VALIDATION_FIXTURES_MATMULKERNELFIXTURE #include "arm_compute/core/KernelDescriptors.h" #include "src/gpu/cl/kernels/ClNativeMatMulKernel.h" @@ -44,7 +44,7 @@ namespace validation using namespace arm_compute::opencl::kernels; template <typename T> -class BatchMatMulValidationFixture : public framework::Fixture +class MatMulKernelValidationFixture : public framework::Fixture { public: template <typename...> @@ -96,7 +96,7 @@ protected: CLTensor b = create_tensor<CLTensor>(shape_b, data_type, 1); CLTensor dst = create_tensor<CLTensor>(output_shape, data_type, 1); - CLSynthetizeOperator<ClNativeMatMulKernel> batchMatMul{}; + CLSynthetizeOperator<ClNativeMatMulKernel> matMul{}; MatMulKernelInfo matmul_info; matmul_info.adj_lhs = pretranspose_a; matmul_info.adj_rhs = pretranspose_b; @@ -104,7 +104,7 @@ protected: matmul_info.n0 = N0; matmul_info.k0 = K0; - batchMatMul.configure(a.info(), b.info(), dst.info(), matmul_info); + matMul.configure(a.info(), b.info(), dst.info(), matmul_info); ARM_COMPUTE_ASSERT(a.info()->is_resizable()); ARM_COMPUTE_ASSERT(b.info()->is_resizable()); ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); @@ -122,12 +122,12 @@ protected: fill(CLAccessor(a), 0); fill(CLAccessor(b), 1); - // Compute batchMatMul kernel + // Compute matMul kernel ITensorPack tensors_pack({ { ACL_SRC_0, &a }, { ACL_SRC_1, &b }, { ACL_DST, &dst } }); - batchMatMul.run(tensors_pack); + matMul.run(tensors_pack); return dst; } @@ -200,4 +200,4 @@ protected: } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ACL_TESTS_VALIDATION_FIXTURES_BATCHMATMULFIXTURE */ +#endif /* ACL_TESTS_VALIDATION_FIXTURES_MATMULKERNELFIXTURE */ |