diff options
author | Gunes Bayir <gunes.bayir@arm.com> | 2024-01-17 16:07:03 +0000 |
---|---|---|
committer | Viet-Hoa Do <viet-hoa.do@arm.com> | 2024-02-01 16:00:34 +0000 |
commit | 2b9fa593a0a172bf36a02b5cdb840c6b9b361d7c (patch) | |
tree | a4e2d5ce46443a79a0778e4960462ce3edf106ec /tests/validation/dynamic_fusion/gpu/cl/MatMul.cpp | |
parent | 7ab7fca87cca8775f82b0e9efec6a40975910c17 (diff) | |
download | ComputeLibrary-2b9fa593a0a172bf36a02b5cdb840c6b9b361d7c.tar.gz |
Use the stable CKW API in the GPU dynamic fusion backend
- Refactor all kernels to work with the CKW stable API
- Add support for sub-tile in the op_load/op_store CKW operator
- Fix mismatch in resize
- Add comments in all kernels written with CKW to help developers
understand the structure of the code
- Add texture image support in depthwise convolution written with CKW
- Add support for different block sizes in depthwise convolution
- Remove the use of the dynamic fusion helper functions.
- Add support for floor in the op_unary() of CKW
Resolves: COMPMID-6708, COMPMID-6743, COMPMID-6530
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Signed-off-by: Jakub Sujak <jakub.sujak@arm.com>
Change-Id: I8104ce4d04a3138a1aeb0b84940e1f1c89e76069
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10914
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'tests/validation/dynamic_fusion/gpu/cl/MatMul.cpp')
-rw-r--r-- | tests/validation/dynamic_fusion/gpu/cl/MatMul.cpp | 169 |
1 files changed, 70 insertions, 99 deletions
diff --git a/tests/validation/dynamic_fusion/gpu/cl/MatMul.cpp b/tests/validation/dynamic_fusion/gpu/cl/MatMul.cpp index d714a2f70c..96b79679c3 100644 --- a/tests/validation/dynamic_fusion/gpu/cl/MatMul.cpp +++ b/tests/validation/dynamic_fusion/gpu/cl/MatMul.cpp @@ -25,6 +25,7 @@ #include "tests/AssetsLibrary.h" #include "tests/CL/CLAccessor.h" #include "tests/datasets/LargeMatMulDataset.h" +#include "tests/datasets/MatMulDataset.h" #include "tests/datasets/SmallMatMulDataset.h" #include "tests/framework/datasets/Datasets.h" #include "tests/framework/Fixture.h" @@ -54,27 +55,36 @@ RelativeTolerance<half_float::half> tolerance_f16(half( 0.02)); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ } // namespace -/** M0 values to test --precommit*/ -const auto m0_values_precommit = framework::dataset::make("M0", {1, 3}); +/** M0 values to test - precommit */ +const auto m0_values_lhs_nt_precommit = framework::dataset::make("M0", {1, 2, 3}); -/** N0 values to test --precommit*/ -const auto n0_values_precommit = framework::dataset::make("N0", {1, 2, 4}); +/** N0 values to test - precommit */ +const auto n0_values_rhs_t_precommit = framework::dataset::make("N0", {1, 2, 4}); -/** K0 values to test --precommit*/ -const auto k0_values_precommit = framework::dataset::make("K0", {1, 2, 3}); +/** K0 values to test - precommit */ +const auto k0_values_rhs_t_precommit = framework::dataset::make("K0", {1, 2, 4}); -/** M0 values to test --nightly*/ -const auto m0_values_nightly_lhs_nt = framework::dataset::make("M0", {1, 2, 3, 4, 5, 6, 7, 8}); -const auto m0_values_nightly_lhs_t = framework::dataset::make("M0", {1, 2, 3, 4, 8}); +/** M0 values to test - nightly */ +const auto m0_values_lhs_nt_nightly = framework::dataset::make("M0", {1, 2, 3, 4}); -/** N0 values to test --nightly*/ -const auto n0_values_nightly_rhs_nt = framework::dataset::make("N0", {1, 2, 3, 4, 8, 16}); -const auto n0_values_nightly_rhs_t = framework::dataset::make("N0", {1, 2, 3, 4, 8}); +/** N0 values to test - nightly */ +const auto n0_values_rhs_t_nightly = framework::dataset::make("N0", {1, 2, 3, 4, 8}); -/** K0 values to test --nightly*/ -const auto k0_values_nightly_lhs_nt_rhs_nt = framework::dataset::make("K0", {1, 2, 3, 4, 8, 16}); -const auto k0_values_nightly_rhs_t = framework::dataset::make("K0", {1, 2, 3, 4, 8}); -const auto k0_values_nightly_lhs_t_rhs_nt = framework::dataset::make("K0", {1, 2, 3, 4, 5, 6, 7, 8}); +/** K0 values to test - nightly */ +const auto k0_values_rhs_t_nightly = framework::dataset::make("K0", {1, 2, 3, 4, 8}); + +class DFMatMulDataset final : public datasets::MatMulDataset +{ +public: + DFMatMulDataset() + { + // LHS = [K, M], RHS = [N, K], DST = [N, M] + add_config(TensorShape(1U, 1U), TensorShape(1U, 1U), TensorShape(1U, 1U)); + add_config(TensorShape(1U, 2U), TensorShape(2U, 1U), TensorShape(2U, 2U)); + add_config(TensorShape(9U, 6U), TensorShape(5U, 9U), TensorShape(5U, 6U)); + add_config(TensorShape(32U, 37U), TensorShape(17U, 32U), TensorShape(17U, 37U)); + } +}; TEST_SUITE(CL) TEST_SUITE(DYNAMIC_FUSION) @@ -247,70 +257,33 @@ using DynamicFusionGpuMatmulFixture = DynamicFusionGpuMatMulValidationFixture<CL TEST_SUITE(Float) TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE( - RunTiny, - DynamicFusionGpuMatmulFixture<float>, - framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(datasets::TinyMatMulDataset(), - framework::dataset::make("TransposeA", {false})), - framework::dataset::make("TransposeB", {true})), - m0_values_precommit), - n0_values_precommit), - k0_values_precommit), - framework::dataset::make("ExportRhsToCLImage", {false})), - framework::dataset::make("DataType", DataType::F32))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); -} - -FIXTURE_DATA_TEST_CASE( - RunSmall, - DynamicFusionGpuMatmulFixture<float>, - framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(), - framework::dataset::make("TransposeA", {false})), - framework::dataset::make("TransposeB", {true})), - m0_values_precommit), - n0_values_precommit), - k0_values_precommit), - framework::dataset::make("ExportRhsToCLImage", {false})), - framework::dataset::make("DataType", DataType::F32))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); -} - -FIXTURE_DATA_TEST_CASE( - RunLargeRhsTransposed, - DynamicFusionGpuMatmulFixture<float>, - framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), - framework::dataset::make("TransposeA", {false})), - framework::dataset::make("TransposeB", {true})), - m0_values_nightly_lhs_nt), - n0_values_nightly_rhs_t), - k0_values_nightly_rhs_t), - framework::dataset::make("ExportRhsToCLImage", {false})), - framework::dataset::make("DataType", DataType::F32))) +FIXTURE_DATA_TEST_CASE(RunPrecommit, + DynamicFusionGpuMatmulFixture<float>, + framework::DatasetMode::ALL, + combine(DFMatMulDataset(), + framework::dataset::make("TransposeA", {false}), + framework::dataset::make("TransposeB", {true}), + m0_values_lhs_nt_precommit, + n0_values_rhs_t_precommit, + k0_values_rhs_t_precommit, + framework::dataset::make("ExportRhsToCLImage", {false}), + framework::dataset::make("DataType", DataType::F32))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); } -// Running High Dimensional test is enough for FP32, because we're stressing the number of dimensions, not data type or M0/N0/K0 -FIXTURE_DATA_TEST_CASE( - RunHighDimensional, - DynamicFusionGpuMatmulFixture<float>, - framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(datasets::HighDimensionalMatMulDataset(), - framework::dataset::make("TransposeA", {false})), - framework::dataset::make("TransposeB", {true})), - framework::dataset::make("M0", {2})), - framework::dataset::make("N0", {2})), - framework::dataset::make("K0", {2})), - framework::dataset::make("ExportRhsToCLImage", {false})), - framework::dataset::make("DataType", DataType::F32))) +FIXTURE_DATA_TEST_CASE(RunNightly, + DynamicFusionGpuMatmulFixture<float>, + framework::DatasetMode::NIGHTLY, + combine(DFMatMulDataset(), + framework::dataset::make("TransposeA", {false}), + framework::dataset::make("TransposeB", {true}), + m0_values_lhs_nt_nightly, + n0_values_rhs_t_nightly, + k0_values_rhs_t_nightly, + framework::dataset::make("ExportRhsToCLImage", {false}), + framework::dataset::make("DataType", DataType::F32))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); @@ -319,35 +292,33 @@ TEST_SUITE_END() // FP32 TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE( - RunSmall, - DynamicFusionGpuMatmulFixture<half>, - framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(), - framework::dataset::make("TransposeA", {false})), - framework::dataset::make("TransposeB", {true})), - m0_values_precommit), - n0_values_precommit), - k0_values_precommit), - framework::dataset::make("ExportRhsToCLImage", {false})), - framework::dataset::make("DataType", DataType::F16))) +FIXTURE_DATA_TEST_CASE(RunPrecommit, + DynamicFusionGpuMatmulFixture<half>, + framework::DatasetMode::ALL, + combine(DFMatMulDataset(), + framework::dataset::make("TransposeA", {false}), + framework::dataset::make("TransposeB", {true}), + m0_values_lhs_nt_precommit, + n0_values_rhs_t_precommit, + k0_values_rhs_t_precommit, + framework::dataset::make("ExportRhsToCLImage", {false}), + framework::dataset::make("DataType", DataType::F16))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); } -FIXTURE_DATA_TEST_CASE( - RunLargeRhsTransposed, - DynamicFusionGpuMatmulFixture<half>, - framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), - framework::dataset::make("TransposeA", {false})), - framework::dataset::make("TransposeB", {true})), - m0_values_nightly_lhs_nt), - n0_values_nightly_rhs_t), - k0_values_nightly_rhs_t), - framework::dataset::make("ExportRhsToCLImage", {false})), - framework::dataset::make("DataType", DataType::F16))) +FIXTURE_DATA_TEST_CASE(RunNightly, + DynamicFusionGpuMatmulFixture<half>, + framework::DatasetMode::NIGHTLY, + combine(DFMatMulDataset(), + framework::dataset::make("TransposeA", {false}), + framework::dataset::make("TransposeB", {true}), + m0_values_lhs_nt_nightly, + n0_values_rhs_t_nightly, + k0_values_rhs_t_nightly, + framework::dataset::make("ExportRhsToCLImage", {false}), + framework::dataset::make("DataType", DataType::F16))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); |