From b0f342ec315397e4b87d3a9cc3d12f3645c153bc Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 21 May 2019 13:32:43 +0100 Subject: COMPMID-2171: Fuse bias addition with CLGEMMMatrixMultiplyReshapedOnlyRHSKernel Change-Id: I1d1e1f28fe7022309d72900893e8368820ca0f89 Signed-off-by: giuros01 Reviewed-on: https://review.mlplatform.org/c/1259 Comments-Addressed: Arm Jenkins Reviewed-by: Michele Di Giorgio Reviewed-by: Gian Marco Iodice Tested-by: Arm Jenkins --- .../CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp | 74 ++++++++++++------ tests/validation/fixtures/GEMMFixture.h | 88 +++++++++++++++++----- 2 files changed, 119 insertions(+), 43 deletions(-) (limited to 'tests') diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp index 83051d2efe..23ae004912 100644 --- a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp +++ b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp @@ -72,6 +72,9 @@ constexpr float tolerance_num_f16 = 0.02f; /** Alpha values to test - Precommit */ const auto a_values = framework::dataset::make("alpha", {1.0f, -0.75f} ); +/** Beta values to test - Precommit */ +const auto beta_values = framework::dataset::make("beta", {-0.75f, 0.0f} ); + /** M values to test */ const auto m_values = framework::dataset::make("M", 37); @@ -120,8 +123,11 @@ const auto i_values_rhs = framework::dataset::make("interleave_rhs", { true, fal /** Transpose values to test with RHS matrix */ const auto t_values_rhs = framework::dataset::make("transpose_rhs", { true, false }); +/**Broadcast bias from vector to matrix */ +const auto broadcast_bias_values = framework::dataset::make("broadcast_bias", {false, true} ); + /** Configuration test */ -void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned int k_value, unsigned int b_value, unsigned int m0_value, unsigned int n0_value, unsigned int k0_value, unsigned int h0_value, bool i_value_rhs, bool t_value_rhs, DataType data_type) +void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned int k_value, unsigned int b_value, unsigned int m0_value, unsigned int n0_value, unsigned int k0_value, unsigned int h0_value, bool i_value_rhs, bool t_value_rhs, bool broadcast_bias, DataType data_type) { const unsigned int M = m_value; const unsigned int N = n_value; @@ -138,7 +144,7 @@ void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned rhs_info.interleave = i_value_rhs; rhs_info.transpose = t_value_rhs; - GEMMReshapeInfo gemm_info(M, N, K); + GEMMReshapeInfo gemm_info(M, N, K, false, false, 0, false, broadcast_bias); const TensorShape lhs_shape(K, M, b_value); const TensorShape rhs_shape(N, K, b_value); @@ -154,13 +160,22 @@ void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned CLTensor rhs_reshaped = create_tensor(rhs_shape_reshaped, data_type); CLTensor dst = create_tensor(dst_shape, data_type); + TensorShape bias_shape = dst_shape; + if (broadcast_bias) + { + bias_shape[1] = 1; + bias_shape[2] = 1; + } + CLTensor bias = create_tensor(bias_shape, data_type); + ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); // Create and configure function CLGEMMMatrixMultiplyReshapedOnlyRHS gemm; - gemm.configure(&lhs, &rhs_reshaped, &dst, 1.0f, lhs_info, rhs_info, gemm_info); + gemm.configure(&lhs, &rhs_reshaped, &bias, &dst, 1.0f, 1.0f, lhs_info, rhs_info, gemm_info); } } // namespace @@ -168,7 +183,7 @@ TEST_SUITE(CL) TEST_SUITE(GEMMMatrixMultiplyReshapedOnlyRHS) TEST_SUITE(Float) TEST_SUITE(FP32) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(combine(combine( +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( m_values, n_values), k_values), @@ -179,13 +194,14 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combi h0_values_precommit), i_values_rhs), t_values_rhs), -m_value, n_value, k_value, b_value, m0_value, n0_value, k0_value, h0_value, i_value_rhs, t_value_rhs) + broadcast_bias_values), +m_value, n_value, k_value, b_value, m0_value, n0_value, k0_value, h0_value, i_value_rhs, t_value_rhs, broadcast_bias) { - validate_configuration(m_value, n_value, k_value, b_value, m0_value, n0_value, k0_value, h0_value, i_value_rhs, t_value_rhs, DataType::F32); + validate_configuration(m_value, n_value, k_value, b_value, m0_value, n0_value, k0_value, h0_value, i_value_rhs, t_value_rhs, broadcast_bias, DataType::F32); } FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( m_values, n_values), k_values), @@ -197,14 +213,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( m_values, n_values), k_values), @@ -216,14 +234,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( m_w_values, m_h_values), n_values), @@ -236,14 +256,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshapedOnlyRHS3DFixture< i_values_rhs), t_values_rhs), framework::dataset::make("DataType", DataType::F32)), - a_values)) + a_values), + b_values)) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); } FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshapedOnlyRHS3DFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( m_w_values, m_h_values), n_values), @@ -256,7 +277,8 @@ FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshapedOnlyRHS3DFixture< i_values_rhs), t_values_rhs), framework::dataset::make("DataType", DataType::F32)), - a_values)) + a_values), + b_values)) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); @@ -265,7 +287,7 @@ TEST_SUITE_END() // FP32 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( m_values, n_values), k_values), @@ -277,14 +299,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( m_values, n_values), k_values), @@ -296,14 +320,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( m_w_values, m_h_values), n_values), @@ -316,14 +342,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshapedOnlyRHS3DFixture< i_values_rhs), t_values_rhs), framework::dataset::make("DataType", DataType::F16)), - a_values)) + a_values), + b_values)) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16); } FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshapedOnlyRHS3DFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( m_w_values, m_h_values), n_values), @@ -336,7 +363,8 @@ FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshapedOnlyRHS3DFixture< i_values_rhs), t_values_rhs), framework::dataset::make("DataType", DataType::F16)), - a_values)) + a_values), + b_values)) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16); @@ -347,4 +375,4 @@ TEST_SUITE_END() // GEMMMatrixMulipltyReshapedOnlyRHS TEST_SUITE_END() // CL } // namespace validation } // namespace test -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/tests/validation/fixtures/GEMMFixture.h b/tests/validation/fixtures/GEMMFixture.h index b7976104aa..34f9bd848c 100644 --- a/tests/validation/fixtures/GEMMFixture.h +++ b/tests/validation/fixtures/GEMMFixture.h @@ -390,7 +390,7 @@ class GEMMMatrixMultiplyReshapedOnlyRHSValidationFixture : public framework::Fix public: template void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0, - bool interleave_rhs, bool transpose_rhs, DataType data_type, float alpha) + bool interleave_rhs, bool transpose_rhs, DataType data_type, float alpha, float beta, bool broadcast_bias) { GEMMLHSMatrixInfo lhs_info; lhs_info.m0 = m0; @@ -407,8 +407,18 @@ public: const TensorShape lhs_shape(k, m, batch_size); const TensorShape rhs_shape(n, k, batch_size); - _target = compute_target(lhs_shape, rhs_shape, lhs_info, rhs_info, data_type, alpha); - _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha); + TensorShape bias_shape; + if(broadcast_bias) + { + bias_shape = TensorShape(n, 1, 1); + } + else + { + bias_shape = TensorShape(n, m, batch_size); + } + + _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias); + _reference = compute_reference(lhs_shape, rhs_shape, bias_shape, data_type, alpha, beta, broadcast_bias); } protected: @@ -423,11 +433,13 @@ protected: library->fill_borders_with_garbage(tensor, distribution_inf, i); } - TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, DataType data_type, float alpha) + TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, + DataType data_type, float alpha, float beta, bool broadcast_bias) { // Create tensors - TensorType lhs = create_tensor(lhs_shape, data_type, 1); - TensorType rhs = create_tensor(rhs_shape, data_type, 1); + TensorType lhs = create_tensor(lhs_shape, data_type, 1); + TensorType rhs = create_tensor(rhs_shape, data_type, 1); + TensorType bias = create_tensor(bias_shape, data_type, 1); TensorType rhs_reshaped; TensorType dst; @@ -441,7 +453,7 @@ protected: ReshapeRHSFunctionType reshape_rhs; GEMMFunctionType gemm; reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info); - gemm.configure(&lhs, &rhs_reshaped, &dst, alpha, lhs_info, rhs_info, GEMMReshapeInfo(M, N, K)); + gemm.configure(&lhs, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, GEMMReshapeInfo(M, N, K, 1, 1, 0, false, broadcast_bias)); ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -450,6 +462,7 @@ protected: lhs.allocator()->allocate(); rhs.allocator()->allocate(); rhs_reshaped.allocator()->allocate(); + bias.allocator()->allocate(); dst.allocator()->allocate(); ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -460,6 +473,7 @@ protected: // Fill tensors fill(AccessorType(lhs), 0); fill(AccessorType(rhs), 1); + fill(AccessorType(bias), 2); // Compute GEMM reshape_rhs.run(); @@ -468,7 +482,7 @@ protected: return dst; } - SimpleTensor compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha) + SimpleTensor compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, DataType data_type, float alpha, float beta, bool broadcast_bias) { TensorShape dst_shape = lhs_shape; dst_shape[0] = rhs_shape[0]; @@ -477,13 +491,31 @@ protected: // Create reference SimpleTensor lhs{ lhs_shape, data_type, 1 }; SimpleTensor rhs{ rhs_shape, data_type, 1 }; - SimpleTensor c{ dst_shape, data_type, 1 }; + SimpleTensor bias{ dst_shape, data_type, 1 }; + + const int n = rhs_shape[0]; + const int m = lhs_shape[1]; + const int batch_size = lhs_shape[2]; // Fill reference fill(lhs, 0); fill(rhs, 1); - return reference::gemm(lhs, rhs, c, alpha, 0.0f); + if(broadcast_bias) + { + SimpleTensor tmp{ bias_shape, data_type, 1 }; + fill(tmp, 2); + for(int i = 0; i < m * batch_size; i++) + { + memcpy(bias.data() + i * n, tmp.data(), n * sizeof(T)); + } + } + else + { + fill(bias, 2); + } + + return (reference::gemm(lhs, rhs, bias, alpha, beta)); } TensorType _target{}; @@ -590,7 +622,7 @@ class GEMMMatrixMultiplyReshapedOnlyRHS3DValidationFixture : public framework::F public: template void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0, - bool interleave_rhs, bool transpose_rhs, DataType data_type, float alpha) + bool interleave_rhs, bool transpose_rhs, DataType data_type, float alpha, float beta) { GEMMLHSMatrixInfo lhs_info; lhs_info.m0 = m0; @@ -609,9 +641,10 @@ public: // Set the tensor shapes for LHS and RHS matrices const TensorShape lhs_shape(k, m, batch_size); const TensorShape rhs_shape(n, k, batch_size); + const TensorShape bias_shape(n, 1, 1); - _target = compute_target(lhs_shape, rhs_shape, lhs_info, rhs_info, data_type, alpha, m_h); - _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, m_h); + _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, m_h); + _reference = compute_reference(lhs_shape, rhs_shape, bias_shape, data_type, alpha, beta, m_h); } protected: @@ -622,12 +655,14 @@ protected: library->fill(tensor, distribution, i); } - TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, DataType data_type, float alpha, + TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, + DataType data_type, float alpha, float beta, unsigned int m_h) { // Create tensors - TensorType lhs = create_tensor(lhs_shape, data_type, 1); - TensorType rhs = create_tensor(rhs_shape, data_type, 1); + TensorType lhs = create_tensor(lhs_shape, data_type, 1); + TensorType rhs = create_tensor(rhs_shape, data_type, 1); + TensorType bias = create_tensor(bias_shape, data_type, 1); TensorType rhs_reshaped; TensorType dst; @@ -641,7 +676,7 @@ protected: ReshapeRHSFunctionType reshape_rhs; GEMMFunctionType gemm; reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info); - gemm.configure(&lhs, &rhs_reshaped, &dst, alpha, lhs_info, rhs_info, GEMMReshapeInfo(M, N, K, 1, 1, m_h)); + gemm.configure(&lhs, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, GEMMReshapeInfo(M, N, K, 1, 1, m_h, false, true)); ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -650,6 +685,7 @@ protected: lhs.allocator()->allocate(); rhs.allocator()->allocate(); rhs_reshaped.allocator()->allocate(); + bias.allocator()->allocate(); dst.allocator()->allocate(); ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -660,6 +696,7 @@ protected: // Fill tensors fill(AccessorType(lhs), 0); fill(AccessorType(rhs), 1); + fill(AccessorType(bias), 2); // Compute GEMM reshape_rhs.run(); @@ -668,7 +705,7 @@ protected: return dst; } - SimpleTensor compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, unsigned int m_h) + SimpleTensor compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, DataType data_type, float alpha, float beta, unsigned int m_h) { TensorShape dst_shape = lhs_shape; dst_shape.set(0, rhs_shape[0]); @@ -679,13 +716,24 @@ protected: // Create reference SimpleTensor lhs{ lhs_shape, data_type, 1 }; SimpleTensor rhs{ rhs_shape, data_type, 1 }; - SimpleTensor c{ dst_shape, data_type, 1 }; + SimpleTensor bias{ dst_shape, data_type, 1 }; + + const int n = rhs_shape[0]; + const int m = lhs_shape[1]; + const int batch_size = lhs_shape[2]; // Fill reference fill(lhs, 0); fill(rhs, 1); - return reference::gemm(lhs, rhs, c, alpha, 0.0f); + SimpleTensor tmp{ bias_shape, data_type, 1 }; + fill(tmp, 2); + for(int i = 0; i < m * batch_size; i++) + { + memcpy(bias.data() + i * n, tmp.data(), n * sizeof(T)); + } + + return reference::gemm(lhs, rhs, bias, alpha, beta); } TensorType _target{}; -- cgit v1.2.1