diff options
Diffstat (limited to 'tests/validation/fixtures/GEMMFixture.h')
-rw-r--r-- | tests/validation/fixtures/GEMMFixture.h | 728 |
1 files changed, 531 insertions, 197 deletions
diff --git a/tests/validation/fixtures/GEMMFixture.h b/tests/validation/fixtures/GEMMFixture.h index 500e094e18..94bedc83e1 100644 --- a/tests/validation/fixtures/GEMMFixture.h +++ b/tests/validation/fixtures/GEMMFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_GEMM_FIXTURE -#define ARM_COMPUTE_TEST_GEMM_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/TensorShape.h" @@ -34,6 +34,7 @@ #include "tests/framework/Fixture.h" #include "tests/validation/Helpers.h" #include "tests/validation/reference/ActivationLayer.h" +#include "tests/validation/reference/ElementwiseOperations.h" #include "tests/validation/reference/GEMM.h" #include <random> @@ -44,16 +45,15 @@ namespace test { namespace validation { -template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false> -class GEMMValidationFixture : public framework::Fixture +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false, bool run_twice = false> +class GEMMGenericValidationFixture : public framework::Fixture { public: - template <typename...> - void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type) + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type, bool accumulate=false) { ARM_COMPUTE_UNUSED(pretranspose); - _target = compute_target(shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type); - _reference = compute_reference(shape_a, shape_b, output_shape, alpha, beta, data_type); + _target = compute_target(shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type, accumulate); + _reference = compute_reference(shape_a, shape_b, output_shape, alpha, beta, data_type, accumulate); } protected: @@ -80,7 +80,7 @@ protected: } TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c, const TensorShape &output_shape, float alpha, float beta, - DataType data_type) + DataType data_type, bool accumulate=false) { // Create tensors TensorType a = create_tensor<TensorType>(shape_a, data_type, 1); @@ -98,12 +98,14 @@ protected: (disable_c) ? nullptr : &c, &dst, alpha, beta, - GEMMInfo(false, false, false, (reinterpret_output_as_3d ? output_shape[2] : 0), reinterpret_input_as_3d, false, GEMMLowpOutputStageInfo(), false, (reinterpret_input_as_3d - || reinterpret_output_as_3d))); - ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + GEMMInfo(false, false, false, (reinterpret_output_as_3d ? output_shape[2] : 0), reinterpret_input_as_3d, false, GEMMLowpOutputStageInfo(), false, false, (reinterpret_input_as_3d + || reinterpret_output_as_3d), arm_compute::ActivationLayerInfo(), false /* fixed_format */, arm_compute::WeightFormat::UNSPECIFIED, false /* pretranspose_B */, accumulate)); + ARM_COMPUTE_ASSERT(a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(b.info()->is_resizable()); + ARM_COMPUTE_ASSERT(c.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + + add_padding_x({ &a, &b, &c, &dst }); // Allocate tensors a.allocator()->allocate(); @@ -111,18 +113,33 @@ protected: c.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!c.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!b.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!c.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(a), 0); fill(AccessorType(b), 1); + if (accumulate) + { + fill(AccessorType(dst), 6); + } if(!disable_c) { fill(AccessorType(c), 2); } + // Run with variable inputs. + if(run_twice) + { + gemm.run(); + fill(AccessorType(a), 3); // Fill tensors with new seed after run + fill(AccessorType(b), 4); + if(!disable_c) + { + fill(AccessorType(c), 5); + } + } // Compute GEMM function gemm.run(); @@ -131,10 +148,9 @@ protected: } SimpleTensor<T> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &output_shape, float alpha, float beta, - DataType data_type) + DataType data_type, bool accumulate=false) { TensorShape shape_a_to_use = shape_a; - if(reinterpret_input_as_3d) { // Collapse the second and third dimension if the input is 3D @@ -145,6 +161,7 @@ protected: SimpleTensor<T> a{ shape_a_to_use, data_type, 1 }; SimpleTensor<T> b{ shape_b, data_type, 1 }; SimpleTensor<T> c{ output_shape, data_type, 1 }; + SimpleTensor<T> dst{ output_shape, data_type, 1 }; // Fill reference fill(a, 0); @@ -157,27 +174,96 @@ protected: const int m = reinterpret_output_as_3d ? output_shape[1] * output_shape[2] : output_shape[1]; const int batch_size = reinterpret_output_as_3d ? output_shape[3] : output_shape[2]; - // In case of broadcast, we need simply copy the first into the following "M" ones + // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(c.data() + i * n, c.data(), n * sizeof(T)); } } + /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M), + therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K) + in order to be able to call reference implementation that works with (B x M x K) input. + Similarly, if pretranspose_B is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */ + + // Define transposed shapes + TensorShape a_transposed_shape(a.shape().y(), a.shape().x()); + TensorShape b_transposed_shape(b.shape().y(), b.shape().x()); + + // Define transposed tensors + SimpleTensor<T> a_transposed{ a_transposed_shape, data_type }; + SimpleTensor<T> b_transposed{ b_transposed_shape, data_type }; + + // pretranspose a if necessary + if(pretranspose_a) + { + transpose_matrix<T>(a, a_transposed); + } + + // pretranspose b if necessary + if(pretranspose_b) + { + transpose_matrix<T>(b, b_transposed); + } + + // Run with variable inputs. + if(run_twice) + { + reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta); + fill((pretranspose_a) ? a_transposed : a, 3); + fill((pretranspose_b) ? b_transposed : b, 4); + fill(c, 5); + } + + // Do in place summation + if (accumulate) + { + fill(dst, 6); + } + // Setting beta to 0 will effectively disable C for the // computation of the reference: alpha * A * B + 0 * C - return reference::gemm<T>(a, b, c, alpha, disable_c ? 0.f : beta); + // Use transposed tensors if boolean enabled else use original tensors + if (accumulate) + { + reference::gemm_accumulate<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta, dst); + return dst; + } + else + { + return reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta); + } } TensorType _target{}; SimpleTensor<T> _reference{}; }; -template <typename TensorType, typename AccessorType, typename T, typename GEMMFunctionType> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false, bool run_twice = false> +class GEMMValidationFixture : protected GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice> +{ +public: + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type) + { + GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice>::setup(shape_a, shape_b, shape_c, output_shape, alpha, beta, pretranspose, data_type, false /*accumulate*/); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false, bool run_twice = false> +class GEMMAccumulateValidationFixture : protected GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice> +{ +public: + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type) + { + bool accumulate = true; + GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice>::setup(shape_a, shape_b, shape_c, output_shape, alpha, beta, pretranspose, data_type, accumulate); + } +}; + +template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType> class GEMMMatrixMultiplyValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch) { @@ -224,12 +310,14 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - GEMMFunctionType gemm; - gemm.configure(gpu_arch, &lhs, &rhs, &bias, &dst, alpha, beta, false, reshape_info, fp16_mixed_precision, act_info); + GEMMOperatorType gemm; + gemm.configure(gpu_arch, lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, false, reshape_info, fp16_mixed_precision, act_info); + + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + add_padding_x({ &lhs, &rhs, &bias, &dst }); // Allocate tensors lhs.allocator()->allocate(); @@ -237,10 +325,10 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); @@ -248,7 +336,11 @@ protected: fill(AccessorType(bias), 2); // Compute GEMM - gemm.run(); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, + { ACL_SRC_1, &rhs }, + { ACL_SRC_2, &bias }, + { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -276,7 +368,7 @@ protected: if(broadcast_bias) { - // In case of broadcast, we need simply copy the first into the following "M" ones + // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); @@ -290,11 +382,10 @@ protected: SimpleTensor<T> _reference{}; }; -template <typename TensorType, typename AccessorType, typename T, typename GEMMFunctionType> +template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType> class GEMMMatrixMultiply3DValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch) { @@ -340,12 +431,14 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - GEMMFunctionType gemm; - gemm.configure(gpu_arch, &lhs, &rhs, &bias, &dst, alpha, beta, false, reshape_info, fp16_mixed_precision, act_info); + GEMMOperatorType gemm; + gemm.configure(gpu_arch, lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, false, reshape_info, fp16_mixed_precision, act_info); + + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + add_padding_x({ &lhs, &rhs, &bias, &dst }); // Allocate tensors lhs.allocator()->allocate(); @@ -353,10 +446,10 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); @@ -364,7 +457,11 @@ protected: fill(AccessorType(bias), 2); // Compute GEMM - gemm.run(); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, + { ACL_SRC_1, &rhs }, + { ACL_SRC_2, &bias }, + { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -392,7 +489,7 @@ protected: fill(rhs, 1); fill(bias, 2); - // In case of broadcast, we need simply copy the first into the following "M" ones + // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); @@ -405,11 +502,10 @@ protected: SimpleTensor<T> _reference{}; }; -template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSFunctionType, typename ReshapeRHSFunctionType, typename GEMMFunctionType> +template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType> class GEMMMatrixMultiplyInterleavedTransposedValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, unsigned int v0, unsigned int h0, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch) { @@ -472,16 +568,22 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - ReshapeLHSFunctionType reshape_lhs; - ReshapeRHSFunctionType reshape_rhs; - GEMMFunctionType gemm; - reshape_lhs.configure(&lhs, &lhs_reshaped, lhs_info); - reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info); - gemm.configure(gpu_arch, &lhs_reshaped, &rhs_reshaped, &bias, &dst, alpha, beta, true, reshape_info, fp16_mixed_precision, act_info); - - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ReshapeLHSOperatorType reshape_lhs; + ReshapeRHSOperatorType reshape_rhs; + GEMMOperatorType gemm; + reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); + reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); + gemm.configure(gpu_arch, lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, true, reshape_info, fp16_mixed_precision, act_info); + + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + + // We do not pad when using image as it needs to comply to strict pitch alignment restrictions + if(!rhs_info.export_to_cl_image) + { + add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst }); + } // Allocate tensors lhs.allocator()->allocate(); @@ -491,12 +593,12 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!lhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); @@ -504,9 +606,15 @@ protected: fill(AccessorType(bias), 2); // Compute GEMM - reshape_lhs.run(); - reshape_rhs.run(); - gemm.run(); + ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }; + reshape_lhs.run(reshape_lhs_pack); + ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; + reshape_rhs.run(reshape_rhs_pack); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, + { ACL_SRC_1, &rhs_reshaped }, + { ACL_SRC_2, &bias }, + { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -534,7 +642,7 @@ protected: if(broadcast_bias) { - // In case of broadcast, we need simply copy the first into the following "M" ones + // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); @@ -548,11 +656,10 @@ protected: SimpleTensor<T> _reference{}; }; -template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSFunctionType, typename ReshapeRHSFunctionType, typename GEMMFunctionType> +template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType> class GEMMMatrixMultiplyInterleavedTransposed3DValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, unsigned int v0, unsigned int h0, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch) { @@ -614,16 +721,22 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - ReshapeLHSFunctionType reshape_lhs; - ReshapeRHSFunctionType reshape_rhs; - GEMMFunctionType gemm; - reshape_lhs.configure(&lhs, &lhs_reshaped, lhs_info); - reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info); - gemm.configure(gpu_arch, &lhs_reshaped, &rhs_reshaped, &bias, &dst, alpha, beta, true, reshape_info, fp16_mixed_precision, act_info); - - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ReshapeLHSOperatorType reshape_lhs; + ReshapeRHSOperatorType reshape_rhs; + GEMMOperatorType gemm; + reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); + reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); + gemm.configure(gpu_arch, lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, true, reshape_info, fp16_mixed_precision, act_info); + + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + + // We do not pad when using image as it needs to comply to strict pitch alignment restrictions + if(!rhs_info.export_to_cl_image) + { + add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst }); + } // Allocate tensors lhs.allocator()->allocate(); @@ -633,12 +746,12 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!lhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); @@ -646,9 +759,15 @@ protected: fill(AccessorType(bias), 2); // Compute GEMM - reshape_lhs.run(); - reshape_rhs.run(); - gemm.run(); + ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }; + reshape_lhs.run(reshape_lhs_pack); + ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; + reshape_rhs.run(reshape_rhs_pack); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, + { ACL_SRC_1, &rhs_reshaped }, + { ACL_SRC_2, &bias }, + { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -676,7 +795,7 @@ protected: fill(rhs, 1); fill(bias, 2); - // In case of broadcast, we need simply copy the first into the following "M" ones + // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); @@ -689,11 +808,10 @@ protected: SimpleTensor<T> _reference{}; }; -template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSFunctionType, typename ReshapeRHSFunctionType, typename GEMMFunctionType, bool fp_mixed_precision = false> +template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType, bool fp_mixed_precision = false> class GEMMMatrixMultiplyReshapedValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool interleave_lhs, bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, bool lhs_transpose, const ActivationLayerInfo &act_info) { @@ -768,9 +886,9 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - ReshapeLHSFunctionType reshape_lhs; - ReshapeRHSFunctionType reshape_rhs; - GEMMFunctionType gemm; + ReshapeLHSOperatorType reshape_lhs; + ReshapeRHSOperatorType reshape_rhs; + GEMMOperatorType gemm; validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); validate_result = validate_result || !rhs_info.export_to_cl_image; @@ -779,13 +897,19 @@ protected: return nullptr; } - reshape_lhs.configure(&lhs, &lhs_reshaped, lhs_info); - reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info); - gemm.configure(&lhs_reshaped, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info); + reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); + reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); + gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + + // We do not pad when using image as it needs to comply to strict pitch alignment restrictions + if(!rhs_info.export_to_cl_image) + { + add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst }); + } // Allocate tensors lhs.allocator()->allocate(); @@ -795,12 +919,12 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!lhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); @@ -808,9 +932,15 @@ protected: fill(AccessorType(bias), 2); // Compute GEMM - reshape_lhs.run(); - reshape_rhs.run(); - gemm.run(); + ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }; + reshape_lhs.run(reshape_lhs_pack); + ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; + reshape_rhs.run(reshape_rhs_pack); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, + { ACL_SRC_1, &rhs_reshaped }, + { ACL_SRC_2, &bias }, + { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -838,7 +968,7 @@ protected: if(broadcast_bias) { - // In case of broadcast, we need simply copy the first into the following "M" ones + // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); @@ -860,11 +990,10 @@ protected: SimpleTensor<T> _reference{}; }; -template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSFunctionType, typename ReshapeRHSFunctionType, typename GEMMFunctionType, bool fp_mixed_precision = false> +template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType, bool fp_mixed_precision = false> class GEMMMatrixMultiplyReshaped3DValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool interleave_lhs, bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool lhs_transpose, const ActivationLayerInfo &act_info) { @@ -936,9 +1065,9 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - ReshapeLHSFunctionType reshape_lhs; - ReshapeRHSFunctionType reshape_rhs; - GEMMFunctionType gemm; + ReshapeLHSOperatorType reshape_lhs; + ReshapeRHSOperatorType reshape_rhs; + GEMMOperatorType gemm; validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); validate_result = validate_result || !rhs_info.export_to_cl_image; @@ -947,13 +1076,19 @@ protected: return nullptr; } - reshape_lhs.configure(&lhs, &lhs_reshaped, lhs_info); - reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info); - gemm.configure(&lhs_reshaped, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info); + reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); + reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); + gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + + // We do not pad when using image as it needs to comply to strict pitch alignment restrictions + if(!rhs_info.export_to_cl_image) + { + add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst }); + } // Allocate tensors lhs.allocator()->allocate(); @@ -963,12 +1098,12 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!lhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); @@ -976,9 +1111,15 @@ protected: fill(AccessorType(bias), 2); // Compute GEMM - reshape_lhs.run(); - reshape_rhs.run(); - gemm.run(); + ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }; + reshape_lhs.run(reshape_lhs_pack); + ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; + reshape_rhs.run(reshape_rhs_pack); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, + { ACL_SRC_1, &rhs_reshaped }, + { ACL_SRC_2, &bias }, + { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -1006,7 +1147,7 @@ protected: fill(rhs, 1); fill(bias, 2); - // In case of broadcast, we need simply copy the first into the following "M" ones + // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); @@ -1027,11 +1168,10 @@ protected: SimpleTensor<T> _reference{}; }; -template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSFunctionType, typename GEMMFunctionType> +template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType> class GEMMMatrixMultiplyReshapedOnlyRHSValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0, bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) { @@ -1101,8 +1241,8 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - ReshapeRHSFunctionType reshape_rhs; - GEMMFunctionType gemm; + ReshapeRHSOperatorType reshape_rhs; + GEMMOperatorType gemm; validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); validate_result = validate_result || !rhs_info.export_to_cl_image; @@ -1111,12 +1251,18 @@ protected: return nullptr; } - reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info); - gemm.configure(&lhs, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info); + reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); + gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + + // We do not pad when using image as it needs to comply to strict pitch alignment restrictions + if(!rhs_info.export_to_cl_image) + { + add_padding_x({ &lhs, &rhs, &rhs_reshaped, &bias, &dst }); + } // Allocate tensors lhs.allocator()->allocate(); @@ -1125,11 +1271,11 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); @@ -1137,8 +1283,13 @@ protected: fill(AccessorType(bias), 2); // Compute GEMM - reshape_rhs.run(); - gemm.run(); + ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; + reshape_rhs.run(reshape_rhs_pack); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, + { ACL_SRC_1, &rhs_reshaped }, + { ACL_SRC_2, &bias }, + { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -1166,7 +1317,7 @@ protected: if(broadcast_bias) { - // In case of broadcast, we need simply copy the first into the following "M" ones + // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); @@ -1181,11 +1332,10 @@ protected: SimpleTensor<T> _reference{}; }; -template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSFunctionType, typename GEMMFunctionType> +template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType> class GEMMMatrixMultiplyReshapedOnlyRHS3DValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0, bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, bool has_pad_y, DataType data_type, float alpha, float beta, const ActivationLayerInfo &act_info) { @@ -1253,8 +1403,8 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - ReshapeRHSFunctionType reshape_rhs; - GEMMFunctionType gemm; + ReshapeRHSOperatorType reshape_rhs; + GEMMOperatorType gemm; validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); validate_result = validate_result || !rhs_info.export_to_cl_image; @@ -1263,8 +1413,8 @@ protected: return nullptr; } - reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info); - gemm.configure(&lhs, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info); + reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); + gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); if(has_pad_y) { @@ -1273,9 +1423,15 @@ protected: dst.info()->extend_padding(PaddingSize(2, 0, 1, 0)); } - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + + // We do not pad when using image as it needs to comply to strict pitch alignment restrictions + if(!rhs_info.export_to_cl_image) + { + add_padding_x({ &lhs, &rhs, &rhs_reshaped, &bias, &dst }); + } // Allocate tensors lhs.allocator()->allocate(); @@ -1284,11 +1440,11 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); @@ -1296,8 +1452,13 @@ protected: fill(AccessorType(bias), 2); // Compute GEMM - reshape_rhs.run(); - gemm.run(); + ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; + reshape_rhs.run(reshape_rhs_pack); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, + { ACL_SRC_1, &rhs_reshaped }, + { ACL_SRC_2, &bias }, + { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -1325,7 +1486,7 @@ protected: fill(rhs, 1); fill(bias, 2); - // In case of broadcast, we need simply copy the first into the following "M" ones + // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); @@ -1339,11 +1500,10 @@ protected: SimpleTensor<T> _reference{}; }; -template <typename TensorType, typename AccessorType, typename T, typename GEMMFunctionType> +template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType> class GEMMMatrixMultiplyNativeValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) { @@ -1403,12 +1563,14 @@ protected: kernel_info.activation_info = act_info; // Create and configure function - GEMMFunctionType gemm; - gemm.configure(&lhs, &rhs, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info); + GEMMOperatorType gemm; + gemm.configure(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); + + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + add_padding_x({ &lhs, &rhs, &bias, &dst }); // Allocate tensors lhs.allocator()->allocate(); @@ -1416,10 +1578,10 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); @@ -1427,7 +1589,11 @@ protected: fill(AccessorType(bias), 2); // Compute GEMM - gemm.run(); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, + { ACL_SRC_1, &rhs }, + { ACL_SRC_2, &bias }, + { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -1455,7 +1621,7 @@ protected: if(broadcast_bias) { - // In case of broadcast, we need simply copy the first into the following "M" ones + // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); @@ -1469,11 +1635,10 @@ protected: SimpleTensor<T> _reference{}; }; -template <typename TensorType, typename AccessorType, typename T, typename GEMMFunctionType> +template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType> class GEMMMatrixMultiplyNative3DValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta, const ActivationLayerInfo &act_info) { @@ -1532,12 +1697,14 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - GEMMFunctionType gemm; - gemm.configure(&lhs, &rhs, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info); + GEMMOperatorType gemm; + gemm.configure(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + + add_padding_x({ &lhs, &rhs, &bias, &dst }); // Allocate tensors lhs.allocator()->allocate(); @@ -1545,10 +1712,10 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); @@ -1556,7 +1723,11 @@ protected: fill(AccessorType(bias), 2); // Compute GEMM - gemm.run(); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, + { ACL_SRC_1, &rhs }, + { ACL_SRC_2, &bias }, + { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -1584,7 +1755,7 @@ protected: fill(rhs, 1); fill(bias, 2); - // In case of broadcast, we need simply copy the first into the following "M" ones + // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); @@ -1597,7 +1768,170 @@ protected: SimpleTensor<T> _reference{}; }; +template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType> +class GEMMMatrixMultiplyReshapedOnlyRhsMMULValidationFixture : public framework::Fixture +{ +public: + void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, bool export_to_cl_image, DataType data_type, float alpha, + float beta, bool broadcast_bias, + const ActivationLayerInfo &act_info) + { + GEMMLHSMatrixInfo lhs_info; + lhs_info.m0 = m0; + lhs_info.k0 = k0; + + GEMMRHSMatrixInfo rhs_info; + rhs_info.n0 = n0; + rhs_info.k0 = k0; + rhs_info.interleave = true; + rhs_info.transpose = false; + rhs_info.h0 = 4; + rhs_info.export_to_cl_image = export_to_cl_image; + + // Set the tensor shapes for LHS and RHS matrices + const TensorShape lhs_shape(k, m, batch_size); + const TensorShape rhs_shape(n, k, batch_size); + const TensorShape bias_shape(n, + broadcast_bias ? 1 : m, + broadcast_bias ? 1 : batch_size); + + _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info); + _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info); + } + +protected: + template <typename U> + void fill(U &&tensor, int i) + { + static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported."); + using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type; + + DistributionType distribution{ T(-1.0f), T(1.0f) }; + library->fill(tensor, distribution, i); + + // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0) + DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) }; + library->fill_borders_with_garbage(tensor, distribution_inf, i); + } + + TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, + DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) + { + // Create tensors + TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1); + TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1); + TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1); + TensorType rhs_reshaped; + TensorType dst; + + const unsigned int M = lhs_shape[1]; + const unsigned int N = rhs_shape[0]; + const unsigned int K = lhs_shape[0]; + GEMMKernelInfo kernel_info; + kernel_info.m = M; + kernel_info.n = N; + kernel_info.k = K; + kernel_info.depth_output_gemm3d = 0; + kernel_info.reinterpret_input_as_3d = false; + kernel_info.broadcast_bias = broadcast_bias; + kernel_info.activation_info = act_info; + + // Create and configure function + ReshapeRHSOperatorType reshape_rhs; + GEMMOperatorType gemm; + + validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); + if(!validate_result) + { + return nullptr; + } + + reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); + + validate_result = bool(gemm.validate(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info)); + if(!validate_result) + { + return nullptr; + } + + gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); + + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + + // Allocate tensors + lhs.allocator()->allocate(); + rhs.allocator()->allocate(); + rhs_reshaped.allocator()->allocate(); + bias.allocator()->allocate(); + dst.allocator()->allocate(); + + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + + // Fill tensors + fill(AccessorType(lhs), 0); + fill(AccessorType(rhs), 1); + fill(AccessorType(bias), 2); + + // Compute GEMM + ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; + reshape_rhs.run(reshape_rhs_pack); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, + { ACL_SRC_1, &rhs_reshaped }, + { ACL_SRC_2, &bias }, + { ACL_DST, &dst } }); + gemm.run(gemm_pack); + + return dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias, + const ActivationLayerInfo &act_info) + { + if(!validate_result) + return SimpleTensor<T>(); + + TensorShape dst_shape = lhs_shape; + dst_shape[0] = rhs_shape[0]; + dst_shape[1] = lhs_shape[1]; + + // Create reference + SimpleTensor<T> lhs{ lhs_shape, data_type, 1 }; + SimpleTensor<T> rhs{ rhs_shape, data_type, 1 }; + SimpleTensor<T> bias{ dst_shape, data_type, 1 }; + + const int n = rhs_shape[0]; + const int m = lhs_shape[1]; + const int batch_size = lhs_shape[2]; + + // Fill reference + fill(lhs, 0); + fill(rhs, 1); + fill(bias, 2); + + if(broadcast_bias) + { + // In case of broadcast, we need to simply copy the first into the following "M" ones + for(int i = 1; i < m * batch_size; i++) + { + memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); + } + } + + return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info); + } + + bool validate_result = true; + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; + } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_GEMM_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H |