aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiCong Li <sicong.li@arm.com>2020-08-03 15:39:45 +0100
committerSiCong Li <sicong.li@arm.com>2020-08-06 15:38:22 +0000
commitb972ae62dd877eb53e6ad56ee124cfbc89441e2d (patch)
treefaaf38a072dd4c1c277567772f3106ad39dd9f3a
parent187558fdacc457887d21b61f50425cd55187722a (diff)
downloadComputeLibrary-b972ae62dd877eb53e6ad56ee124cfbc89441e2d.tar.gz
COMPMID-3652 Fix CLFullyConnectedLayer failure on S10
* Fix out-of-bound mem reads in cases where M < M0 in CLGEMMMatrixMultiplyNativeKernel and CLGEMMMatrixMultiplyReshapedOnlyRHSKernel, as a result of the new boundary-aware reading logics. * Add fixture tests (alongside the padding configuration tests) in these 2 kernels to catch all 5 possible scenarios with block dimension configurations, which includes this particular bug as the "...BoundaryHandlingFullInXSinglePartialInY" test case Change-Id: I8a10ab67594171e3ea4fb6e35c84ddc4ab964fba Signed-off-by: SiCong Li <sicong.li@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3650 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp6
-rw-r--r--tests/validation/CL/GEMMMatrixMultiplyNative.cpp62
-rw-r--r--tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp66
4 files changed, 138 insertions, 2 deletions
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp
index c67d3601ad..da57aa447f 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp
@@ -261,6 +261,10 @@ void CLGEMMMatrixMultiplyNativeKernel::configure(const CLCompileContext &compile
const unsigned int partial_store_m0 = internal_m % lhs_info.m0;
const unsigned int partial_store_n0 = gemm_info.n % rhs_info.n0;
+ // Shrink M0 to be always <= M (internal_m) to prevent out-of-bounds reads.
+ // NOTE: This might have implications on heuristics and performance
+ const unsigned int internal_m0 = std::min(internal_m, lhs_info.m0);
+
// Create build options
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input0->info()->data_type()));
@@ -277,7 +281,7 @@ void CLGEMMMatrixMultiplyNativeKernel::configure(const CLCompileContext &compile
build_opts.add_option("-DM=" + support::cpp11::to_string(internal_m));
build_opts.add_option("-DN=" + support::cpp11::to_string(gemm_info.n));
build_opts.add_option("-DK=" + support::cpp11::to_string(gemm_info.k));
- build_opts.add_option("-DM0=" + support::cpp11::to_string(lhs_info.m0));
+ build_opts.add_option("-DM0=" + support::cpp11::to_string(internal_m0));
build_opts.add_option("-DN0=" + support::cpp11::to_string(rhs_info.n0));
build_opts.add_option("-DK0=" + support::cpp11::to_string(rhs_info.k0));
build_opts.add_option("-DPARTIAL_STORE_M0=" + support::cpp11::to_string(partial_store_m0));
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp
index 27520c6072..e65726b234 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp
@@ -263,6 +263,10 @@ void CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::configure(const CLCompileContext
const unsigned int partial_store_m0 = internal_m % lhs_info.m0;
const unsigned int partial_store_n0 = gemm_info.n % rhs_info.n0;
+ // Shrink M0 to be always <= M (internal_m) to prevent out-of-bounds reads.
+ // NOTE: This might have implications on heuristics and performance
+ const unsigned int internal_m0 = std::min(internal_m, lhs_info.m0);
+
// Create build options
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input0->info()->data_type()));
@@ -282,7 +286,7 @@ void CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::configure(const CLCompileContext
build_opts.add_option("-DM=" + support::cpp11::to_string(internal_m));
build_opts.add_option("-DN=" + support::cpp11::to_string(gemm_info.n));
build_opts.add_option("-DK=" + support::cpp11::to_string(gemm_info.k));
- build_opts.add_option("-DM0=" + support::cpp11::to_string(lhs_info.m0));
+ build_opts.add_option("-DM0=" + support::cpp11::to_string(internal_m0));
build_opts.add_option("-DN0=" + support::cpp11::to_string(rhs_info.n0));
build_opts.add_option("-DK0=" + support::cpp11::to_string(rhs_info.k0));
build_opts.add_option("-DH0=" + support::cpp11::to_string(rhs_info.h0));
diff --git a/tests/validation/CL/GEMMMatrixMultiplyNative.cpp b/tests/validation/CL/GEMMMatrixMultiplyNative.cpp
index cdcc1a48f0..6ba5012d15 100644
--- a/tests/validation/CL/GEMMMatrixMultiplyNative.cpp
+++ b/tests/validation/CL/GEMMMatrixMultiplyNative.cpp
@@ -118,6 +118,28 @@ const auto k0_values_nightly = framework::dataset::make("K0", { 2, 3, 4, 8 });
/** Broadcast bias from vector to matrix */
const auto broadcast_bias_values = framework::dataset::make("broadcast_bias", { false, true } );
+/** Boundary handling cases for testing partial/non-partial (full) block dimensions, resulting from different combinations
+ * of M, M0, N and N0 values.
+ * M0 and N0 are kept constant, while the different test cases need to vary M and N.
+ *
+ * Eg. M = 64 and N = 33 result in a block dimension that has no partial blocks (all full blocks) in Y dimension and
+ * parital blocks in X dimension.
+ */
+const auto boundary_handling_cases = combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ // Large k to force potential out-of-bound reads on input0
+ framework::dataset::make("K", 315),
+ // Batch size == 1 to force potential out-of-bound reads on input0
+ framework::dataset::make("batch_size", 1)),
+ framework::dataset::make("M0", 4)),
+ framework::dataset::make("N0", 4)),
+ framework::dataset::make("K0", 4)),
+ // Only need to test F32 as F16 shares identical boundary handling logics
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("alpha", -0.75f )),
+ framework::dataset::make("beta", -0.35f )),
+ broadcast_bias_values),
+ framework::dataset::make("Activation", ActivationLayerInfo()));
+
/** Configuration test */
void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned int k_value, unsigned int b_value, unsigned int m0_value, unsigned int n0_value, unsigned int k0_value, bool broadcast_bias, DataType data_type, const ActivationLayerInfo &act_info)
{
@@ -257,6 +279,46 @@ m_value, n_value, m0_value, n0_value)
ARM_COMPUTE_EXPECT(status, framework::LogLevel::ERRORS);
}
+FIXTURE_DATA_TEST_CASE(RunSmallBoundaryHandlingPartialInXPartialInY, CLGEMMMatrixMultiplyNativeFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(
+ framework::dataset::make("M", 3),
+ framework::dataset::make("N", 1)),
+ boundary_handling_cases))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallBoundaryHandlingPartialInXFullInY, CLGEMMMatrixMultiplyNativeFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(
+ framework::dataset::make("M", 64),
+ framework::dataset::make("N", 51)),
+ boundary_handling_cases))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallBoundaryHandlingFullInXFullInY, CLGEMMMatrixMultiplyNativeFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(
+ framework::dataset::make("M", 64),
+ framework::dataset::make("N", 32)),
+ boundary_handling_cases))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallBoundaryHandlingFullInXPartialInY, CLGEMMMatrixMultiplyNativeFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(
+ framework::dataset::make("M", 37),
+ framework::dataset::make("N", 32)),
+ boundary_handling_cases))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyNativeFixture<float>, framework::DatasetMode::ALL,
combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
m_values,
diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp
index 6a1d495576..bd0cd03ca7 100644
--- a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp
+++ b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp
@@ -131,6 +131,32 @@ const auto t_values_rhs = framework::dataset::make("transpose_rhs", { true, fals
/** Broadcast bias from vector to matrix */
const auto broadcast_bias_values = framework::dataset::make("broadcast_bias", { false, true } );
+/** Boundary handling cases for testing partial/non-partial (full) block dimensions, resulting from different combinations
+ * of M, M0, N and N0 values.
+ * M0 and N0 are kept constant, while the different test cases need to vary M and N.
+ *
+ * Eg. M = 64 and N = 33 result in a block dimension that has no partial blocks (all full blocks) in Y dimension and
+ * parital blocks in X dimension.
+ */
+const auto boundary_handling_cases = combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ // Large k to force potential out-of-bound reads on input0
+ framework::dataset::make("K", 315),
+ // Batch size == 1 to force potential out-of-bound reads on input0
+ framework::dataset::make("batch_size", 1)),
+ framework::dataset::make("M0", 4)),
+ framework::dataset::make("N0", 4)),
+ framework::dataset::make("K0", 4)),
+ framework::dataset::make("H0", 3)),
+ i_values_rhs),
+ t_values_rhs),
+ framework::dataset::make("export_to_cl_image_rhs", {true, false})),
+ // Only need to test F32 as F16 shares identical boundary handling logics
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("alpha", -0.75f )),
+ framework::dataset::make("beta", -0.35f )),
+ broadcast_bias_values),
+ framework::dataset::make("Activation", ActivationLayerInfo()));
+
/** Configuration test */
bool validate_configuration(unsigned int m_value, unsigned int n_value, unsigned int k_value, unsigned int b_value,
unsigned int m0_value, unsigned int n0_value, unsigned int k0_value, unsigned int h0_value,
@@ -330,6 +356,46 @@ m_value, n_value, m0_value, n0_value, export_to_cl_image)
TEST_SUITE(Float)
TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunPrecommitBoundaryHandlingPartialInXPartialInY, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(
+ framework::dataset::make("M", 3),
+ framework::dataset::make("N", 1)),
+ boundary_handling_cases))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunPrecommitBoundaryHandlingPartialInXFullInY, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(
+ framework::dataset::make("M", 64),
+ framework::dataset::make("N", 43)),
+ boundary_handling_cases))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunPrecommitBoundaryHandlingFullInXFullInY, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(
+ framework::dataset::make("M", 64),
+ framework::dataset::make("N", 32)),
+ boundary_handling_cases))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunPrecommitBoundaryHandlingFullInXPartialInY, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(
+ framework::dataset::make("M", 37),
+ framework::dataset::make("N", 32)),
+ boundary_handling_cases))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
FIXTURE_DATA_TEST_CASE(RunPrecommit, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture<float>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
m_values,