From bf9731edfa0439cad4d70efc3065e71e199c62b8 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Wed, 12 Dec 2018 10:18:04 +0000 Subject: COMPMID-1687: Optimize CLGEMMMatrixMultiplyKernel for Mali-G76 - Part1 The current implementation is limited just to FP32 Change-Id: I185ab57e483e879d7c301e9cc3033efc8b41e244 Reviewed-on: https://review.mlplatform.org/389 Reviewed-by: Anthony Barbier Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio --- tests/framework/Macros.h | 11 +- tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp | 224 +++++++++++++++++++++ tests/validation/CL/Im2Col.cpp | 1 + tests/validation/fixtures/GEMMFixture.h | 111 ++++++++++ 4 files changed, 343 insertions(+), 4 deletions(-) create mode 100644 tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp (limited to 'tests') diff --git a/tests/framework/Macros.h b/tests/framework/Macros.h index deca1ef51a..591b80e9d8 100644 --- a/tests/framework/Macros.h +++ b/tests/framework/Macros.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -49,8 +49,8 @@ #define CONCAT(ARG0, ARG1) ARG0##ARG1 -#define VARIADIC_SIZE_IMPL(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, size, ...) size -#define VARIADIC_SIZE(...) VARIADIC_SIZE_IMPL(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +#define VARIADIC_SIZE_IMPL(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, size, ...) size +#define VARIADIC_SIZE(...) VARIADIC_SIZE_IMPL(__VA_ARGS__, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) #define JOIN_PARAM1(OP, param) OP(0, param) #define JOIN_PARAM2(OP, param, ...) \ @@ -80,6 +80,9 @@ #define JOIN_PARAM10(OP, param, ...) \ OP(9, param) \ , JOIN_PARAM9(OP, __VA_ARGS__) +#define JOIN_PARAM11(OP, param, ...) \ + OP(10, param) \ + , JOIN_PARAM10(OP, __VA_ARGS__) #define JOIN_PARAM(OP, NUM, ...) \ CONCAT(JOIN_PARAM, NUM) \ (OP, __VA_ARGS__) @@ -264,4 +267,4 @@ // // TEST CASE MACROS END // -#endif /* ARM_COMPUTE_TEST_FRAMEWORK_MACROS */ +#endif /* ARM_COMPUTE_TEST_FRAMEWORK_MACROS */ \ No newline at end of file diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp new file mode 100644 index 0000000000..e2fa194765 --- /dev/null +++ b/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "tests/CL/CLAccessor.h" +#include "tests/CL/Helper.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/GEMMFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +// *INDENT-OFF* +// clang-format off +RelativeTolerance tolerance_f32(0.001f); +constexpr float abs_tolerance_f32(0.0001f); + +/** M values to test */ +const auto m_values = framework::dataset::make("M", 37); + +/** N values to test */ +const auto n_values = framework::dataset::make("N", 51); + +/** K values to test */ +const auto k_values = framework::dataset::make("K", 43); + +/** Batch size values to test */ +const auto b_values = framework::dataset::make("batch_size", 1, 3); + +/** M0 values to test - Precommit */ +const auto m0_values_precommit = framework::dataset::make("M0", {4, 5, 6}); + +/** N0 values to test - Precommit */ +const auto n0_values_precommit = framework::dataset::make("N0", { 2, 4, 8 }); + +/** K0 values to test - Precommit */ +const auto k0_values_precommit = framework::dataset::make("K0", { 4, 8 }); + +/** V0 values to test - Precommit */ +const auto v0_values_precommit = framework::dataset::make("V0", 1, 3); + +/** H0 values to test - Precommit */ +const auto h0_values_precommit = framework::dataset::make("H0", 1, 3); + +/** M0 values to test - Nightly */ +const auto m0_values_nightly = framework::dataset::make("M0", 2, 8); + +/** N0 values to test - Nightly */ +const auto n0_values_nightly = framework::dataset::make("N0", { 2, 4, 8, 16 }); + +/** K0 values to test - Nightly */ +const auto k0_values_nightly = framework::dataset::make("K0", { 4, 8, 16 }); + +/** V0 values to test - Nightly */ +const auto v0_values_nightly = framework::dataset::make("V0", 1, 4); + +/** H0 values to test - Nightly */ +const auto h0_values_nightly = framework::dataset::make("H0", 1, 4); + +/** Interleave values to test with LHS matrix */ +const auto i_values_lhs = framework::dataset::make("interleave_lhs", { true, false }); + +/** Interleave values to test with RHS matrix */ +const auto i_values_rhs = framework::dataset::make("interleave_rhs", { true, false }); + +} // namespace + +using namespace arm_compute::misc::shape_calculator; + +// Create function for CLGEMMReshapeLHSMatrixKernel +using CLGEMMReshapeLHSMatrix = CLSynthetizeFunctionInitOutputWithZeroAndWithZeroConstantBorder; + +// Create function for CLGEMMReshapeRHSMatrixKernel +using CLGEMMReshapeRHSMatrix = CLSynthetizeFunctionInitOutputWithZeroAndWithZeroConstantBorder; + +// Create function for CLGEMMMatrixMultiplyReshapedKernel +using CLGEMMMatrixMultiplyReshaped = CLSynthetizeFunction; + +// Fixture for CLGEMMMatrixMultiplyReshaped +using CLGEMMMatrixMultiplyReshapedFixture = GEMMMatrixMultiplyReshapedValidationFixture; + +TEST_SUITE(CL) +TEST_SUITE(GEMMMatrixMultiplyReshaped) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + m_values, + n_values), + k_values), + framework::dataset::make("batch_size", 1)), + m0_values_precommit), + n0_values_precommit), + k0_values_precommit), + v0_values_precommit), + h0_values_precommit), + i_values_lhs), + i_values_rhs), +m_value, n_value, k_value, b_value, m0_value, n0_value, k0_value, v0_value, h0_value, i_value_lhs, i_value_rhs) +{ + const unsigned int M = m_value; + const unsigned int N = n_value; + const unsigned int K = k_value; + + GEMMLHSMatrixInfo lhs_info; + lhs_info.m0 = m0_value; + lhs_info.k0 = k0_value; + lhs_info.v0 = v0_value; + lhs_info.interleave = i_value_lhs; + lhs_info.transpose = false; + + GEMMRHSMatrixInfo rhs_info; + rhs_info.n0 = n0_value; + rhs_info.k0 = k0_value; + rhs_info.h0 = h0_value; + rhs_info.interleave = i_value_rhs; + rhs_info.transpose = true; + + GEMMReshapeInfo gemm_info(M, N, K); + + const TensorShape lhs_shape(K, M, b_value); + const TensorShape lhs_shape_reshaped = compute_lhs_reshaped_shape(TensorInfo(lhs_shape, 1, DataType::F32), + lhs_info, + false); + + const TensorShape rhs_shape(N, K, b_value); + const TensorShape rhs_shape_reshaped = compute_rhs_reshaped_shape(TensorInfo(rhs_shape, 1, DataType::F32), + rhs_info); + + const TensorShape dst_shape = compute_mm_shape(TensorInfo(lhs_shape_reshaped, 1, DataType::F32), + TensorInfo(rhs_shape_reshaped, 1, DataType::F32), + gemm_info); + + // Create tensors + CLTensor lhs_reshaped = create_tensor(lhs_shape_reshaped, DataType::F32); + CLTensor rhs_reshaped = create_tensor(rhs_shape_reshaped, DataType::F32); + CLTensor dst = create_tensor(dst_shape, DataType::F32); + + ARM_COMPUTE_EXPECT(lhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Create and configure function + CLGEMMMatrixMultiplyReshaped gemm; + gemm.configure(&lhs_reshaped, &rhs_reshaped, &dst, 1.0f, lhs_info, rhs_info, gemm_info); +} + +TEST_SUITE(Float) +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedFixture, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + m_values, + n_values), + k_values), + b_values), + m0_values_precommit), + n0_values_precommit), + k0_values_precommit), + v0_values_precommit), + h0_values_precommit), + i_values_lhs), + i_values_rhs)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + m_values, + n_values), + k_values), + b_values), + m0_values_nightly), + n0_values_nightly), + k0_values_nightly), + v0_values_nightly), + h0_values_nightly), + i_values_lhs), + i_values_rhs)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); +} +TEST_SUITE_END() // Float +TEST_SUITE_END() // FP32 +TEST_SUITE_END() // GEMMMatrixMulipltyReshaped +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute \ No newline at end of file diff --git a/tests/validation/CL/Im2Col.cpp b/tests/validation/CL/Im2Col.cpp index ebf2331e5e..432b3b239a 100644 --- a/tests/validation/CL/Im2Col.cpp +++ b/tests/validation/CL/Im2Col.cpp @@ -49,6 +49,7 @@ const auto conv_filter_sizes = framework::dataset::make("KernelDims", { Size2D(3 Size2D(1U, 3U), Size2D(5U, 3U), Size2D(1U, 1U), + Size2D(9U, 9U), Size2D(11U, 11U)} ); const auto padstrides = framework::dataset::make("PadStride", { PadStrideInfo(1U, 1U, 0U, 0U), PadStrideInfo(1U, 1U, 1U, 1U), diff --git a/tests/validation/fixtures/GEMMFixture.h b/tests/validation/fixtures/GEMMFixture.h index 0083abffb5..ce2b177ce9 100644 --- a/tests/validation/fixtures/GEMMFixture.h +++ b/tests/validation/fixtures/GEMMFixture.h @@ -151,6 +151,117 @@ protected: SimpleTensor _reference{}; }; +template +class GEMMMatrixMultiplyReshapedValidationFixture : public framework::Fixture +{ +public: + template + void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool interleave_lhs, + bool interleave_rhs) + { + GEMMLHSMatrixInfo lhs_info; + lhs_info.m0 = m0; + lhs_info.k0 = k0; + lhs_info.v0 = v0; + lhs_info.interleave = interleave_lhs; + lhs_info.transpose = false; + + GEMMRHSMatrixInfo rhs_info; + rhs_info.n0 = n0; + rhs_info.k0 = k0; + rhs_info.h0 = h0; + rhs_info.interleave = interleave_rhs; + rhs_info.transpose = true; + + // Set the tensor shapes for LHS and RHS matrices + const TensorShape lhs_shape(k, m, batch_size); + const TensorShape rhs_shape(n, k, batch_size); + + _target = compute_target(lhs_shape, rhs_shape, lhs_info, rhs_info); + _reference = compute_reference(lhs_shape, rhs_shape); + } + +protected: + template + void fill(U &&tensor, int i) + { + std::uniform_real_distribution<> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + } + + TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info) + { + // Create tensors + TensorType lhs = create_tensor(lhs_shape, DataType::F32, 1); + TensorType rhs = create_tensor(rhs_shape, DataType::F32, 1); + TensorType lhs_reshaped; + TensorType rhs_reshaped; + TensorType dst; + + const unsigned int M = lhs_shape[1]; + const unsigned int N = rhs_shape[0]; + const unsigned int K = lhs_shape[0]; + + // The output tensor will be auto-initialized within the function + + // Create and configure function + ReshapeLHSFunctionType reshape_lhs; + ReshapeRHSFunctionType reshape_rhs; + GEMMFunctionType gemm; + reshape_lhs.configure(&lhs, &lhs_reshaped, lhs_info); + reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info); + gemm.configure(&lhs_reshaped, &rhs_reshaped, &dst, 1.0f, lhs_info, rhs_info, GEMMReshapeInfo(M, N, K)); + + ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Allocate tensors + lhs.allocator()->allocate(); + rhs.allocator()->allocate(); + lhs_reshaped.allocator()->allocate(); + rhs_reshaped.allocator()->allocate(); + dst.allocator()->allocate(); + + ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!lhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Fill tensors + fill(AccessorType(lhs), 0); + fill(AccessorType(rhs), 1); + + // Compute GEMM + reshape_lhs.run(); + reshape_rhs.run(); + gemm.run(); + + return dst; + } + + SimpleTensor compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape) + { + TensorShape dst_shape = lhs_shape; + dst_shape[0] = rhs_shape[0]; + dst_shape[1] = lhs_shape[1]; + + // Create reference + SimpleTensor lhs{ lhs_shape, DataType::F32, 1 }; + SimpleTensor rhs{ rhs_shape, DataType::F32, 1 }; + SimpleTensor c{ dst_shape, DataType::F32, 1 }; + + // Fill reference + fill(lhs, 0); + fill(rhs, 1); + fill(c, 2); + + return reference::gemm(lhs, rhs, c, 1.0f, 0.0f); + } + + TensorType _target{}; + SimpleTensor _reference{}; +}; } // namespace validation } // namespace test } // namespace arm_compute -- cgit v1.2.1