From 9ae06d4986bc3055f7786c1097b465bd321cf8eb Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Thu, 22 Oct 2020 16:37:12 +0100 Subject: COMPMID-3925: Dispatch CLGEMM with no padding y requirement - Add has_pad_y flag in GEMMKernelInfo - Skip reinterpret as 3D in CLGEMMMatrixMultiplyReshapedOnlyRHSKernel if has_pad_y = false - Add test to validate CLGEMMMatrixMultiplyReshapedOnlyRHSkernel with had_pad_y = false/true - Configure two variants of CLGEMMMatrixMultiplyReshapedOnlyRHSKernel to run with has_pad_y = false/true in CLGEMM - Check if the lhs/dst tensors have pad y. If not, run CLGEMMMatrixMultiplyReshapedOnlyRHSKernel without padding requirement Change-Id: I68bb43389789736d676b899ac7c77fd9138babaf Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4248 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp') diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp index 98149ce149..5629a80f8e 100644 --- a/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp +++ b/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp @@ -340,6 +340,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi false /**< reinterpret the input as 3D */, true /**< Flag used to broadcast the bias addition */, false /**< wider accumm */, + false /**< has pad y */, ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1 /**< Multiplication factor for the width of the 1xW transposed block */, 1 /**< Multiplication factor for the height of the 4x4 interleaved block */, @@ -354,6 +355,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi false /**< reinterpret the input as 3D */, true /**< Flag used to broadcast the bias addition */, false /**< wider accumm */, + false /**< has pad y */, ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1 /**< Multiplication factor for the width of the 1xW transposed block */, 1 /**< Multiplication factor for the height of the 4x4 interleaved block */, @@ -371,6 +373,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi false /**< reinterpret the input as 3D */, false /**< Flag used to broadcast the bias addition */, false /**< wider accumm */, + false /**< has pad y */, ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1 /**< Multiplication factor for the width of the 1xW transposed block */, 1 /**< Multiplication factor for the height of the 4x4 interleaved block */, @@ -386,6 +389,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi false /**< reinterpret the input as 3D */, false /**< Flag used to broadcast the bias addition */, true /**< wider accumm */, + true /**< has pad y */, ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1 /**< Multiplication factor for the width of the 1xW transposed block */, 1 /**< Multiplication factor for the height of the 4x4 interleaved block */, @@ -400,6 +404,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi false /**< reinterpret the input as 3D */, false /**< Flag used to broadcast the bias addition */, false /**< wider accumm */, + false /**< has pad y */, ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1 /**< Multiplication factor for the width of the 1xW transposed block */, 1 /**< Multiplication factor for the height of the 4x4 interleaved block */, @@ -573,6 +578,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi false /**< reinterpret the input as 3D */, true /**< Flag used to broadcast the bias addition */, false /**< wider accumm */, + false /**< has pad y */, ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1 /**< Multiplication factor for the width of the 1xW transposed block */, 1 /**< Multiplication factor for the height of the 4x4 interleaved block */, @@ -586,6 +592,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi false /**< reinterpret the input as 3D */, true /**< Flag used to broadcast the bias addition */, false /**< wider accumm */, + false /**< has pad y */, ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1 /**< Multiplication factor for the width of the 1xW transposed block */, 1 /**< Multiplication factor for the height of the 4x4 interleaved block */, @@ -599,6 +606,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi false /**< reinterpret the input as 3D */, true /**< Flag used to broadcast the bias addition */, false /**< wider accumm */, + false /**< has pad y */, ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1 /**< Multiplication factor for the width of the 1xW transposed block */, 1 /**< Multiplication factor for the height of the 4x4 interleaved block */, @@ -613,6 +621,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi false /**< reinterpret the input as 3D */, true /**< Flag used to broadcast the bias addition */, false /**< wider accumm */, + false /**< has pad y */, ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1 /**< Multiplication factor for the width of the 1xW transposed block */, 1 /**< Multiplication factor for the height of the 4x4 interleaved block */, @@ -626,6 +635,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi false /**< reinterpret the input as 3D */, true /**< Flag used to broadcast the bias addition */, false /**< wider accumm */, + false /**< has pad y */, ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1 /**< Multiplication factor for the width of the 1xW transposed block */, 1 /**< Multiplication factor for the height of the 4x4 interleaved block */, @@ -939,6 +949,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi false /**< reinterpret the input as 3D */, true /**< Flag used to broadcast the bias addition */, false /**< wider accumm */, + false /**< has pad y */, ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1 /**< Multiplication factor for the width of the 1xW transposed block */, 1 /**< Multiplication factor for the height of the 4x4 interleaved block */, @@ -952,6 +963,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi false /**< reinterpret the input as 3D */, true /**< Flag used to broadcast the bias addition */, false /**< wider accumm */, + false /**< has pad y */, ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1 /**< Multiplication factor for the width of the 1xW transposed block */, 1 /**< Multiplication factor for the height of the 4x4 interleaved block */, @@ -965,6 +977,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi false /**< reinterpret the input as 3D */, true /**< Flag used to broadcast the bias addition */, false /**< wider accumm */, + false /**< has pad y */, ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1 /**< Multiplication factor for the width of the 1xW transposed block */, 1 /**< Multiplication factor for the height of the 4x4 interleaved block */, @@ -979,6 +992,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi false /**< reinterpret the input as 3D */, true /**< Flag used to broadcast the bias addition */, false /**< wider accumm */, + false /**< has pad y */, ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1 /**< Multiplication factor for the width of the 1xW transposed block */, 1 /**< Multiplication factor for the height of the 4x4 interleaved block */, @@ -992,6 +1006,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi false /**< reinterpret the input as 3D */, true /**< Flag used to broadcast the bias addition */, false /**< wider accumm */, + false /**< has pad y */, ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1 /**< Multiplication factor for the width of the 1xW transposed block */, 1 /**< Multiplication factor for the height of the 4x4 interleaved block */, -- cgit v1.2.1