From afa19725f7f3feb2c21a6aed02ade49d08e3097b Mon Sep 17 00:00:00 2001 From: SiCongLi Date: Sun, 24 Oct 2021 19:12:33 +0100 Subject: Add post ops to ClGemmMatrixMultiplyReshapedOnlyRHSKernel and ClGemmMatrixMultiplyNativeKernel Part 3 Partially resolves: COMPMID-4435 Change-Id: Ifc5affa3a24a70942ca2d001380205df09b03ad7 Signed-off-by: SiCongLi Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6550 Reviewed-by: Gian Marco Iodice Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- tests/validation/CL/GEMMMatrixMultiplyNative.cpp | 218 +++++++++++++++++++++++ 1 file changed, 218 insertions(+) (limited to 'tests/validation/CL/GEMMMatrixMultiplyNative.cpp') diff --git a/tests/validation/CL/GEMMMatrixMultiplyNative.cpp b/tests/validation/CL/GEMMMatrixMultiplyNative.cpp index dc5fbc36ba..e3f151a2ca 100644 --- a/tests/validation/CL/GEMMMatrixMultiplyNative.cpp +++ b/tests/validation/CL/GEMMMatrixMultiplyNative.cpp @@ -53,6 +53,11 @@ using CLGEMMMatrixMultiplyNative = CLSynthetizeOperator using CLGEMMMatrixMultiplyNativeFixture = GEMMMatrixMultiplyNativeValidationFixture; +// Fixture for CLGEMMMatrixMultiplyNative with post ops +template +using CLGEMMMatrixMultiplyNativeWithPostOpsFixture = + GEMMMatrixMultiplyNativeWithPostOpsValidationFixture; + // Fixture for CLGEMMMatrixMultiplyNative3D template using CLGEMMMatrixMultiplyNative3DFixture = GEMMMatrixMultiplyNative3DValidationFixture; @@ -141,6 +146,80 @@ const auto boundary_handling_cases = combine(combine(combine(combine(combine(com broadcast_bias_values), framework::dataset::make("Activation", ActivationLayerInfo())); +/** Post Ops */ +using PostOpArgBroadcast = CLGEMMMatrixMultiplyNativeWithPostOpsFixture::PostOpArgBroadcast; +experimental::PostOpList post_ops_1() +{ + experimental::PostOpList post_ops{}; + post_ops.push_back_op>(ActivationLayerInfo{ActivationLayerInfo::ActivationFunction::LINEAR, 0.5F, 0.0F}); + post_ops.push_back_op>( + std::make_tuple(true, true, false), // If broadcast in dims 0, 1 and 2 + 0, + ConvertPolicy::SATURATE); + post_ops.push_back_op>(ActivationLayerInfo{ActivationLayerInfo::ActivationFunction::RELU, 2.1F, 1.3F}); + return post_ops; +} +experimental::PostOpList post_ops_2() +{ + experimental::PostOpList post_ops{}; + post_ops.push_back_op>( + std::make_tuple(false, true, true), // If broadcast in dims 0, 1 and 2 + 1, + ConvertPolicy::SATURATE); + post_ops.push_back_op>(ActivationLayerInfo{ActivationLayerInfo::ActivationFunction::RELU, 2.1F, 1.3F}); + return post_ops; +} +experimental::PostOpList post_ops_3() +{ + experimental::PostOpList post_ops{}; + // post_ops.push_back_op>(ActivationLayerInfo{ActivationLayerInfo::ActivationFunction::RELU, 2.1F, 1.3F}); + post_ops.push_back_op>( + std::make_tuple(false, false, false), // If broadcast in dims 0, 1 and 2 + 1, + ConvertPolicy::SATURATE); + return post_ops; +} + +/** Different Post Op Lists */ +const auto post_op_lists = framework::dataset::make("post_op_lists", { + post_ops_1(), + post_ops_2(), + post_ops_3(), +} ); + +bool is_post_op_list_valid(unsigned int m, unsigned int n, unsigned int k, unsigned int batch, DataType data_type, const experimental::PostOpList& post_ops) +{ + const auto lhs_info = GEMMLHSMatrixInfo(4,4,1,false,true); + const auto rhs_info = GEMMRHSMatrixInfo(4,4,1,true,true,false); + + // Create TensorInfo for post op arguments + TensorInfo input0_info(TensorShape(k, m, batch), 1, data_type); + TensorInfo input1_info(TensorShape(n, k, batch), 1, data_type); + TensorInfo input2_info(TensorShape(n), 1, data_type); + TensorInfo output_info(TensorShape(n, m, batch), 1, data_type); + + GEMMKernelInfo gemm_info(m, n, k, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */, + false /**< reinterpret the input as 3D */, + true /**< Flag used to broadcast the bias addition */, + false /**< wider accumm */, + false /**< has pad y */, + ActivationLayerInfo::ActivationFunction::IDENTITY, + 1 /**< Multiplication factor for the width of the 1xW transposed block */, + 1 /**< Multiplication factor for the height of the 4x4 interleaved block */, + lhs_info, + rhs_info, + 0 /**< Offset to be added to each element of the matrix A */, + 0 /**< Offset to be added to each element of the matrix B */, + post_ops); + return bool(ClGemmMatrixMultiplyNativeKernel::validate(&input0_info.clone()->set_is_resizable(true), + &input1_info.clone()->set_is_resizable(true), + &input2_info.clone()->set_is_resizable(true), + &output_info.clone()->set_is_resizable(true),1.f,1.f, + lhs_info, + rhs_info, + gemm_info)); +} + /** Configuration test */ void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned int k_value, unsigned int b_value, unsigned int m0_value, unsigned int n0_value, unsigned int k0_value, bool broadcast_bias, DataType data_type, const ActivationLayerInfo &act_info) { @@ -191,6 +270,119 @@ void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned TEST_SUITE(CL) TEST_SUITE(GEMMMatrixMultiplyNative) +TEST_SUITE(ValidateFusedPostOpsConfigs) +TEST_SUITE(Invalid) +TEST_CASE(UnsupportedPostOpSequence, framework::DatasetMode::ALL) +{ + const auto data_type = DataType::F32; + const unsigned int m = 17; + const unsigned int n = 1; + const unsigned int k = 13; + const unsigned int batch = 2; + TensorShape post_op_arg0_shape(n, m, batch); + TensorInfo post_op_arg_info(post_op_arg0_shape, 1, data_type); + auto post_op_arg1_info = post_op_arg_info.clone(); + + // Unsupported sequence of post ops + experimental::PostOpList post_ops{}; + post_ops.push_back_op>( + &post_op_arg_info, + 1, + ConvertPolicy::SATURATE); + post_ops.push_back_op>( + post_op_arg1_info.get(), + 0, + ConvertPolicy::SATURATE); + + ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == false, framework::LogLevel::ERRORS); +} +TEST_CASE(OutputWidened, framework::DatasetMode::ALL) +{ + // Invalid broadcast: post op tensors "widen" the output tensor + const auto data_type = DataType::F32; + const unsigned int m = 1; + const unsigned int n = 18; + const unsigned int k = 13; + const unsigned int batch = 2; + TensorShape post_op_arg_shape(n, m + 1, batch); // output's Y dimension (m) is "widened", which is not allowed + TensorInfo post_op_arg_info(post_op_arg_shape, 1, data_type); + experimental::PostOpList post_ops{}; + post_ops.push_back_op>( &post_op_arg_info, 0, ConvertPolicy::SATURATE); + + ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == false, framework::LogLevel::ERRORS); +} +TEST_CASE(BroadcastInXDimOnly, framework::DatasetMode::ALL) +{ + // Invalid broadcast: post op tensors broadcast in the first dimension (X) only + const auto data_type = DataType::F32; + const unsigned int m = 22; + const unsigned int n = 16; + const unsigned int k = 15; + const unsigned int batch = 3; + TensorShape post_op_arg_shape(1, m, batch); + TensorInfo post_op_arg_info(post_op_arg_shape, 1, data_type); + experimental::PostOpList post_ops{}; + post_ops.push_back_op>( &post_op_arg_info, 0, ConvertPolicy::SATURATE); + + ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == false, framework::LogLevel::ERRORS); +} +TEST_SUITE_END() // Invalid +TEST_SUITE(Valid) +TEST_CASE(EmptyPostOpList, framework::DatasetMode::ALL) +{ + const auto data_type = DataType::F32; + const unsigned int m = 22; + const unsigned int n = 16; + const unsigned int k = 15; + const unsigned int batch = 3; + experimental::PostOpList post_ops{}; + + ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == true, framework::LogLevel::ERRORS); +} +TEST_CASE(BroadcastInYDimOnly, framework::DatasetMode::ALL) +{ + const auto data_type = DataType::F32; + const unsigned int m = 22; + const unsigned int n = 16; + const unsigned int k = 15; + const unsigned int batch = 3; + TensorShape post_op_arg_shape(n, 1, batch); + TensorInfo post_op_arg_info(post_op_arg_shape, 1, data_type); + experimental::PostOpList post_ops{}; + post_ops.push_back_op>( &post_op_arg_info, 0, ConvertPolicy::SATURATE); + + ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == true, framework::LogLevel::ERRORS); +} +TEST_CASE(BroadcastInBothXandYDims, framework::DatasetMode::ALL) +{ + const auto data_type = DataType::F32; + const unsigned int m = 22; + const unsigned int n = 16; + const unsigned int k = 15; + const unsigned int batch = 3; + TensorShape post_op_arg_shape(1, 1, batch); + TensorInfo post_op_arg_info(post_op_arg_shape, 1, data_type); + experimental::PostOpList post_ops{}; + post_ops.push_back_op>( &post_op_arg_info, 0, ConvertPolicy::SATURATE); + + ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == true, framework::LogLevel::ERRORS); +} +TEST_CASE(BroadcastInAllDims, framework::DatasetMode::ALL) +{ + const auto data_type = DataType::F32; + const unsigned int m = 22; + const unsigned int n = 16; + const unsigned int k = 15; + const unsigned int batch = 3; + TensorShape post_op_arg_shape(1, 1, 1); + TensorInfo post_op_arg_info(post_op_arg_shape, 1, data_type); + experimental::PostOpList post_ops{}; + post_ops.push_back_op>( &post_op_arg_info, 0, ConvertPolicy::SATURATE); + + ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == true, framework::LogLevel::ERRORS); +} +TEST_SUITE_END() // Valid +TEST_SUITE_END() // ValidateFusedPostOps TEST_SUITE(Float) TEST_SUITE(FP32) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(combine( @@ -323,6 +515,32 @@ FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyNative3DFixture, f // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); } + +TEST_SUITE(FusedPostOps) + +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyNativeWithPostOpsFixture, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + m_values, + n_values), + k_values), + b_values), + framework::dataset::make("M0", { 4 })), + n0_values_precommit), + k0_values_precommit), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("alpha", {1.0f} )), + framework::dataset::make("beta", {1.0f} )), + framework::dataset::make("broadcast_bias", { false, true } )), + framework::dataset::make("Activation", { ActivationLayerInfo() })), + post_op_lists) + ) +{ + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); +} + +TEST_SUITE_END() // FusedPostOps + TEST_SUITE_END() // FP32 TEST_SUITE_END() // Float TEST_SUITE_END() // GEMMMatrixMulipltyNative -- cgit v1.2.1