From b972ae62dd877eb53e6ad56ee124cfbc89441e2d Mon Sep 17 00:00:00 2001 From: SiCong Li Date: Mon, 3 Aug 2020 15:39:45 +0100 Subject: COMPMID-3652 Fix CLFullyConnectedLayer failure on S10 * Fix out-of-bound mem reads in cases where M < M0 in CLGEMMMatrixMultiplyNativeKernel and CLGEMMMatrixMultiplyReshapedOnlyRHSKernel, as a result of the new boundary-aware reading logics. * Add fixture tests (alongside the padding configuration tests) in these 2 kernels to catch all 5 possible scenarios with block dimension configurations, which includes this particular bug as the "...BoundaryHandlingFullInXSinglePartialInY" test case Change-Id: I8a10ab67594171e3ea4fb6e35c84ddc4ab964fba Signed-off-by: SiCong Li Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3650 Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins --- .../kernels/CLGEMMMatrixMultiplyNativeKernel.cpp | 6 +- .../CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp | 6 +- tests/validation/CL/GEMMMatrixMultiplyNative.cpp | 62 ++++++++++++++++++++ .../CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp | 66 ++++++++++++++++++++++ 4 files changed, 138 insertions(+), 2 deletions(-) diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp index c67d3601ad..da57aa447f 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp @@ -261,6 +261,10 @@ void CLGEMMMatrixMultiplyNativeKernel::configure(const CLCompileContext &compile const unsigned int partial_store_m0 = internal_m % lhs_info.m0; const unsigned int partial_store_n0 = gemm_info.n % rhs_info.n0; + // Shrink M0 to be always <= M (internal_m) to prevent out-of-bounds reads. + // NOTE: This might have implications on heuristics and performance + const unsigned int internal_m0 = std::min(internal_m, lhs_info.m0); + // Create build options CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input0->info()->data_type())); @@ -277,7 +281,7 @@ void CLGEMMMatrixMultiplyNativeKernel::configure(const CLCompileContext &compile build_opts.add_option("-DM=" + support::cpp11::to_string(internal_m)); build_opts.add_option("-DN=" + support::cpp11::to_string(gemm_info.n)); build_opts.add_option("-DK=" + support::cpp11::to_string(gemm_info.k)); - build_opts.add_option("-DM0=" + support::cpp11::to_string(lhs_info.m0)); + build_opts.add_option("-DM0=" + support::cpp11::to_string(internal_m0)); build_opts.add_option("-DN0=" + support::cpp11::to_string(rhs_info.n0)); build_opts.add_option("-DK0=" + support::cpp11::to_string(rhs_info.k0)); build_opts.add_option("-DPARTIAL_STORE_M0=" + support::cpp11::to_string(partial_store_m0)); diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp index 27520c6072..e65726b234 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp @@ -263,6 +263,10 @@ void CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::configure(const CLCompileContext const unsigned int partial_store_m0 = internal_m % lhs_info.m0; const unsigned int partial_store_n0 = gemm_info.n % rhs_info.n0; + // Shrink M0 to be always <= M (internal_m) to prevent out-of-bounds reads. + // NOTE: This might have implications on heuristics and performance + const unsigned int internal_m0 = std::min(internal_m, lhs_info.m0); + // Create build options CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input0->info()->data_type())); @@ -282,7 +286,7 @@ void CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::configure(const CLCompileContext build_opts.add_option("-DM=" + support::cpp11::to_string(internal_m)); build_opts.add_option("-DN=" + support::cpp11::to_string(gemm_info.n)); build_opts.add_option("-DK=" + support::cpp11::to_string(gemm_info.k)); - build_opts.add_option("-DM0=" + support::cpp11::to_string(lhs_info.m0)); + build_opts.add_option("-DM0=" + support::cpp11::to_string(internal_m0)); build_opts.add_option("-DN0=" + support::cpp11::to_string(rhs_info.n0)); build_opts.add_option("-DK0=" + support::cpp11::to_string(rhs_info.k0)); build_opts.add_option("-DH0=" + support::cpp11::to_string(rhs_info.h0)); diff --git a/tests/validation/CL/GEMMMatrixMultiplyNative.cpp b/tests/validation/CL/GEMMMatrixMultiplyNative.cpp index cdcc1a48f0..6ba5012d15 100644 --- a/tests/validation/CL/GEMMMatrixMultiplyNative.cpp +++ b/tests/validation/CL/GEMMMatrixMultiplyNative.cpp @@ -118,6 +118,28 @@ const auto k0_values_nightly = framework::dataset::make("K0", { 2, 3, 4, 8 }); /** Broadcast bias from vector to matrix */ const auto broadcast_bias_values = framework::dataset::make("broadcast_bias", { false, true } ); +/** Boundary handling cases for testing partial/non-partial (full) block dimensions, resulting from different combinations + * of M, M0, N and N0 values. + * M0 and N0 are kept constant, while the different test cases need to vary M and N. + * + * Eg. M = 64 and N = 33 result in a block dimension that has no partial blocks (all full blocks) in Y dimension and + * parital blocks in X dimension. + */ +const auto boundary_handling_cases = combine(combine(combine(combine(combine(combine(combine(combine(combine( + // Large k to force potential out-of-bound reads on input0 + framework::dataset::make("K", 315), + // Batch size == 1 to force potential out-of-bound reads on input0 + framework::dataset::make("batch_size", 1)), + framework::dataset::make("M0", 4)), + framework::dataset::make("N0", 4)), + framework::dataset::make("K0", 4)), + // Only need to test F32 as F16 shares identical boundary handling logics + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("alpha", -0.75f )), + framework::dataset::make("beta", -0.35f )), + broadcast_bias_values), + framework::dataset::make("Activation", ActivationLayerInfo())); + /** Configuration test */ void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned int k_value, unsigned int b_value, unsigned int m0_value, unsigned int n0_value, unsigned int k0_value, bool broadcast_bias, DataType data_type, const ActivationLayerInfo &act_info) { @@ -257,6 +279,46 @@ m_value, n_value, m0_value, n0_value) ARM_COMPUTE_EXPECT(status, framework::LogLevel::ERRORS); } +FIXTURE_DATA_TEST_CASE(RunSmallBoundaryHandlingPartialInXPartialInY, CLGEMMMatrixMultiplyNativeFixture, framework::DatasetMode::ALL, + combine(combine( + framework::dataset::make("M", 3), + framework::dataset::make("N", 1)), + boundary_handling_cases)) +{ + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunSmallBoundaryHandlingPartialInXFullInY, CLGEMMMatrixMultiplyNativeFixture, framework::DatasetMode::ALL, + combine(combine( + framework::dataset::make("M", 64), + framework::dataset::make("N", 51)), + boundary_handling_cases)) +{ + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunSmallBoundaryHandlingFullInXFullInY, CLGEMMMatrixMultiplyNativeFixture, framework::DatasetMode::ALL, + combine(combine( + framework::dataset::make("M", 64), + framework::dataset::make("N", 32)), + boundary_handling_cases)) +{ + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunSmallBoundaryHandlingFullInXPartialInY, CLGEMMMatrixMultiplyNativeFixture, framework::DatasetMode::ALL, + combine(combine( + framework::dataset::make("M", 37), + framework::dataset::make("N", 32)), + boundary_handling_cases)) +{ + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); +} + FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyNativeFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( m_values, diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp index 6a1d495576..bd0cd03ca7 100644 --- a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp +++ b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp @@ -131,6 +131,32 @@ const auto t_values_rhs = framework::dataset::make("transpose_rhs", { true, fals /** Broadcast bias from vector to matrix */ const auto broadcast_bias_values = framework::dataset::make("broadcast_bias", { false, true } ); +/** Boundary handling cases for testing partial/non-partial (full) block dimensions, resulting from different combinations + * of M, M0, N and N0 values. + * M0 and N0 are kept constant, while the different test cases need to vary M and N. + * + * Eg. M = 64 and N = 33 result in a block dimension that has no partial blocks (all full blocks) in Y dimension and + * parital blocks in X dimension. + */ +const auto boundary_handling_cases = combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + // Large k to force potential out-of-bound reads on input0 + framework::dataset::make("K", 315), + // Batch size == 1 to force potential out-of-bound reads on input0 + framework::dataset::make("batch_size", 1)), + framework::dataset::make("M0", 4)), + framework::dataset::make("N0", 4)), + framework::dataset::make("K0", 4)), + framework::dataset::make("H0", 3)), + i_values_rhs), + t_values_rhs), + framework::dataset::make("export_to_cl_image_rhs", {true, false})), + // Only need to test F32 as F16 shares identical boundary handling logics + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("alpha", -0.75f )), + framework::dataset::make("beta", -0.35f )), + broadcast_bias_values), + framework::dataset::make("Activation", ActivationLayerInfo())); + /** Configuration test */ bool validate_configuration(unsigned int m_value, unsigned int n_value, unsigned int k_value, unsigned int b_value, unsigned int m0_value, unsigned int n0_value, unsigned int k0_value, unsigned int h0_value, @@ -330,6 +356,46 @@ m_value, n_value, m0_value, n0_value, export_to_cl_image) TEST_SUITE(Float) TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunPrecommitBoundaryHandlingPartialInXPartialInY, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture, framework::DatasetMode::PRECOMMIT, + combine(combine( + framework::dataset::make("M", 3), + framework::dataset::make("N", 1)), + boundary_handling_cases)) +{ + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunPrecommitBoundaryHandlingPartialInXFullInY, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture, framework::DatasetMode::PRECOMMIT, + combine(combine( + framework::dataset::make("M", 64), + framework::dataset::make("N", 43)), + boundary_handling_cases)) +{ + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunPrecommitBoundaryHandlingFullInXFullInY, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture, framework::DatasetMode::PRECOMMIT, + combine(combine( + framework::dataset::make("M", 64), + framework::dataset::make("N", 32)), + boundary_handling_cases)) +{ + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunPrecommitBoundaryHandlingFullInXPartialInY, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture, framework::DatasetMode::PRECOMMIT, + combine(combine( + framework::dataset::make("M", 37), + framework::dataset::make("N", 32)), + boundary_handling_cases)) +{ + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); +} + FIXTURE_DATA_TEST_CASE(RunPrecommit, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( m_values, -- cgit v1.2.1