/* * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H #define ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" #include "tests/AssetsLibrary.h" #include "tests/Globals.h" #include "tests/IAccessor.h" #include "tests/framework/Asserts.h" #include "tests/framework/Fixture.h" #include "tests/validation/Helpers.h" #include "tests/validation/reference/ActivationLayer.h" #include "tests/validation/reference/ElementwiseOperations.h" #include "tests/validation/reference/GEMM.h" #include namespace arm_compute { namespace test { namespace validation { template class GEMMGenericValidationFixture : public framework::Fixture { public: void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type, bool accumulate=false) { ARM_COMPUTE_UNUSED(pretranspose); _target = compute_target(shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type, accumulate); _reference = compute_reference(shape_a, shape_b, output_shape, alpha, beta, data_type, accumulate); } protected: template void fill(U &&tensor, int i, float lo = -1.f, float hi = 1.f) { switch(tensor.data_type()) { case DataType::F16: { arm_compute::utils::uniform_real_distribution_16bit distribution{ float(lo), float(hi) }; library->fill(tensor, distribution, i); break; } case DataType::F32: { std::uniform_real_distribution distribution(lo, hi); library->fill(tensor, distribution, i); break; } default: library->fill_tensor_uniform(tensor, i); } } TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c, const TensorShape &output_shape, float alpha, float beta, DataType data_type, bool accumulate=false) { // Create tensors TensorType a = create_tensor(shape_a, data_type, 1); TensorType b = create_tensor(shape_b, data_type, 1); TensorType c = create_tensor(shape_c, data_type, 1); TensorType dst = create_tensor(output_shape, data_type, 1); // Create and configure function FunctionType gemm; // The GEMMinfo includes the values of the depth in case of reinterpreted 3d output. // If the output shape has the same number of dimensions of the input the method called is a 2D matrix multiplication (depth_output_reinterpreted_as_3D = 0), // in the other case we have to use the reinterpreted version of GEMM (depth_output_reinterpreted_as_3D = depth of the 3D output). gemm.configure(&a, &b, (disable_c) ? nullptr : &c, &dst, alpha, beta, GEMMInfo(false, false, false, (reinterpret_output_as_3d ? output_shape[2] : 0), reinterpret_input_as_3d, false, GEMMLowpOutputStageInfo(), false, false, (reinterpret_input_as_3d || reinterpret_output_as_3d), arm_compute::ActivationLayerInfo(), false /* fixed_format */, arm_compute::WeightFormat::UNSPECIFIED, false /* pretranspose_B */, accumulate)); ARM_COMPUTE_ASSERT(a.info()->is_resizable()); ARM_COMPUTE_ASSERT(b.info()->is_resizable()); ARM_COMPUTE_ASSERT(c.info()->is_resizable()); ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); add_padding_x({ &a, &b, &c, &dst }); // Allocate tensors a.allocator()->allocate(); b.allocator()->allocate(); c.allocator()->allocate(); dst.allocator()->allocate(); ARM_COMPUTE_ASSERT(!a.info()->is_resizable()); ARM_COMPUTE_ASSERT(!b.info()->is_resizable()); ARM_COMPUTE_ASSERT(!c.info()->is_resizable()); ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(a), 0); fill(AccessorType(b), 1); if (accumulate) { fill(AccessorType(dst), 6); } if(!disable_c) { fill(AccessorType(c), 2); } // Run with variable inputs. if(run_twice) { gemm.run(); fill(AccessorType(a), 3); // Fill tensors with new seed after run fill(AccessorType(b), 4); if(!disable_c) { fill(AccessorType(c), 5); } } // Compute GEMM function gemm.run(); return dst; } SimpleTensor compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &output_shape, float alpha, float beta, DataType data_type, bool accumulate=false) { TensorShape shape_a_to_use = shape_a; if(reinterpret_input_as_3d) { // Collapse the second and third dimension if the input is 3D shape_a_to_use.collapse(2U, 1U); } // Create reference SimpleTensor a{ shape_a_to_use, data_type, 1 }; SimpleTensor b{ shape_b, data_type, 1 }; SimpleTensor c{ output_shape, data_type, 1 }; SimpleTensor dst{ output_shape, data_type, 1 }; // Fill reference fill(a, 0); fill(b, 1); fill(c, 2); if(reinterpret_input_as_3d || reinterpret_output_as_3d) { const int n = shape_b[0]; const int m = reinterpret_output_as_3d ? output_shape[1] * output_shape[2] : output_shape[1]; const int batch_size = reinterpret_output_as_3d ? output_shape[3] : output_shape[2]; // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(c.data() + i * n, c.data(), n * sizeof(T)); } } /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M), therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K) in order to be able to call reference implementation that works with (B x M x K) input. Similarly, if pretranspose_B is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */ // Define transposed shapes TensorShape a_transposed_shape(a.shape().y(), a.shape().x()); TensorShape b_transposed_shape(b.shape().y(), b.shape().x()); // Define transposed tensors SimpleTensor a_transposed{ a_transposed_shape, data_type }; SimpleTensor b_transposed{ b_transposed_shape, data_type }; // pretranspose a if necessary if(pretranspose_a) { transpose_matrix(a, a_transposed); } // pretranspose b if necessary if(pretranspose_b) { transpose_matrix(b, b_transposed); } // Run with variable inputs. if(run_twice) { reference::gemm((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta); fill((pretranspose_a) ? a_transposed : a, 3); fill((pretranspose_b) ? b_transposed : b, 4); fill(c, 5); } // Do in place summation if (accumulate) { fill(dst, 6); } // Setting beta to 0 will effectively disable C for the // computation of the reference: alpha * A * B + 0 * C // Use transposed tensors if boolean enabled else use original tensors if (accumulate) { reference::gemm_accumulate((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta, dst); return dst; } else { return reference::gemm((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta); } } TensorType _target{}; SimpleTensor _reference{}; }; template class GEMMValidationFixture : protected GEMMGenericValidationFixture { public: void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type) { GEMMGenericValidationFixture::setup(shape_a, shape_b, shape_c, output_shape, alpha, beta, pretranspose, data_type, false /*accumulate*/); } }; template class GEMMAccumulateValidationFixture : protected GEMMGenericValidationFixture { public: void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type) { bool accumulate = true; GEMMGenericValidationFixture::setup(shape_a, shape_b, shape_c, output_shape, alpha, beta, pretranspose, data_type, accumulate); } }; template class GEMMMatrixMultiplyValidationFixture : public framework::Fixture { public: void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch) { // Set the tensor shapes for LHS and RHS matrices const TensorShape lhs_shape(k, m, batch_size); const TensorShape rhs_shape(n, k, batch_size); const TensorShape bias_shape(n, broadcast_bias ? 1 : m, broadcast_bias ? 1 : batch_size); _target = compute_target(lhs_shape, rhs_shape, bias_shape, data_type, alpha, beta, broadcast_bias, fp16_mixed_precision, act_info, gpu_arch); _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info); } protected: template void fill(U &&tensor, int i) { static_assert(std::is_floating_point::value || std::is_same::value, "Only floating point data types supported."); using DistributionType = typename std::conditional::value, arm_compute::utils::uniform_real_distribution_16bit, std::uniform_real_distribution>::type; DistributionType distribution{ T(-1.0f), T(1.0f) }; library->fill(tensor, distribution, i); // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0) DistributionType distribution_inf{ T(std::numeric_limits::infinity()), T(std::numeric_limits::infinity()) }; library->fill_borders_with_garbage(tensor, distribution_inf, i); } TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, DataType data_type, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info, GPUTarget gpu_arch) { // Create tensors TensorType lhs = create_tensor(lhs_shape, data_type, 1); TensorType rhs = create_tensor(rhs_shape, data_type, 1); TensorType bias = create_tensor(bias_shape, data_type, 1); TensorType dst; const unsigned int m = lhs_shape[1]; const unsigned int n = rhs_shape[0]; const unsigned int k = lhs_shape[0]; GEMMReshapeInfo reshape_info(m, n, k, 1, 1, 0, false, broadcast_bias); // The output tensor will be auto-initialized within the function // Create and configure function GEMMOperatorType gemm; gemm.configure(gpu_arch, lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, false, reshape_info, fp16_mixed_precision, act_info); ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); add_padding_x({ &lhs, &rhs, &bias, &dst }); // Allocate tensors lhs.allocator()->allocate(); rhs.allocator()->allocate(); bias.allocator()->allocate(); dst.allocator()->allocate(); ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); fill(AccessorType(rhs), 1); fill(AccessorType(bias), 2); // Compute GEMM ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs }, { ACL_SRC_2, &bias }, { ACL_DST, &dst } }); gemm.run(gemm_pack); return dst; } SimpleTensor compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) { TensorShape dst_shape = lhs_shape; dst_shape[0] = rhs_shape[0]; dst_shape[1] = lhs_shape[1]; // Create reference SimpleTensor lhs{ lhs_shape, data_type, 1 }; SimpleTensor rhs{ rhs_shape, data_type, 1 }; SimpleTensor bias{ dst_shape, data_type, 1 }; const int n = rhs_shape[0]; const int m = lhs_shape[1]; const int batch_size = lhs_shape[2]; // Fill reference fill(lhs, 0); fill(rhs, 1); fill(bias, 2); if(broadcast_bias) { // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); } } return reference::activation_layer(reference::gemm(lhs, rhs, bias, alpha, beta), act_info); } TensorType _target{}; SimpleTensor _reference{}; }; template class GEMMMatrixMultiply3DValidationFixture : public framework::Fixture { public: void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch) { ARM_COMPUTE_UNUSED(broadcast_bias); // In case of GEMM3D, m is the product between m_w and m_h const unsigned int m = m_w * m_h; // Set the tensor shapes for LHS and RHS matrices const TensorShape lhs_shape(k, m, batch_size); const TensorShape rhs_shape(n, k, batch_size); const TensorShape bias_shape(n, 1, 1); _target = compute_target(lhs_shape, rhs_shape, bias_shape, data_type, alpha, beta, m_h, fp16_mixed_precision, act_info, gpu_arch); _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info); } protected: template void fill(U &&tensor, int i) { static_assert(std::is_floating_point::value || std::is_same::value, "Only floating point data types supported."); using DistributionType = typename std::conditional::value, arm_compute::utils::uniform_real_distribution_16bit, std::uniform_real_distribution>::type; DistributionType distribution{ T(-1.0f), T(1.0f) }; library->fill(tensor, distribution, i); } TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, DataType data_type, float alpha, float beta, unsigned int m_h, bool fp16_mixed_precision, const ActivationLayerInfo &act_info, GPUTarget gpu_arch) { // Create tensors TensorType lhs = create_tensor(lhs_shape, data_type, 1); TensorType rhs = create_tensor(rhs_shape, data_type, 1); TensorType bias = create_tensor(bias_shape, data_type, 1); TensorType dst; const unsigned int m = lhs_shape[1]; const unsigned int n = rhs_shape[0]; const unsigned int k = lhs_shape[0]; GEMMReshapeInfo reshape_info(m, n, k, 1, 1, m_h, false, true); // The output tensor will be auto-initialized within the function // Create and configure function GEMMOperatorType gemm; gemm.configure(gpu_arch, lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, false, reshape_info, fp16_mixed_precision, act_info); ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); add_padding_x({ &lhs, &rhs, &bias, &dst }); // Allocate tensors lhs.allocator()->allocate(); rhs.allocator()->allocate(); bias.allocator()->allocate(); dst.allocator()->allocate(); ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); fill(AccessorType(rhs), 1); fill(AccessorType(bias), 2); // Compute GEMM ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs }, { ACL_SRC_2, &bias }, { ACL_DST, &dst } }); gemm.run(gemm_pack); return dst; } SimpleTensor compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, unsigned int m_h, const ActivationLayerInfo &act_info) { TensorShape dst_shape = lhs_shape; dst_shape.set(0, rhs_shape[0]); dst_shape.set(1, lhs_shape[1] / m_h); dst_shape.set(2, m_h); dst_shape.set(3, lhs_shape[2]); // Create reference SimpleTensor lhs{ lhs_shape, data_type, 1 }; SimpleTensor rhs{ rhs_shape, data_type, 1 }; SimpleTensor bias{ dst_shape, data_type, 1 }; const int n = rhs_shape[0]; const int m = lhs_shape[1]; const int batch_size = lhs_shape[2]; // Fill reference fill(lhs, 0); fill(rhs, 1); fill(bias, 2); // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); } return reference::activation_layer(reference::gemm(lhs, rhs, bias, alpha, beta), act_info); } TensorType _target{}; SimpleTensor _reference{}; }; template class GEMMMatrixMultiplyInterleavedTransposedValidationFixture : public framework::Fixture { public: void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, unsigned int v0, unsigned int h0, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch) { GEMMLHSMatrixInfo lhs_info; lhs_info.m0 = 4; lhs_info.k0 = 4; lhs_info.v0 = v0; lhs_info.interleave = true; lhs_info.transpose = true; GEMMRHSMatrixInfo rhs_info; rhs_info.n0 = 16 / sizeof(T); rhs_info.k0 = 1; rhs_info.h0 = h0; rhs_info.interleave = false; rhs_info.transpose = false; // Set the tensor shapes for LHS and RHS matrices const TensorShape lhs_shape(k, m, batch_size); const TensorShape rhs_shape(n, k, batch_size); const TensorShape bias_shape(n, broadcast_bias ? 1 : m, broadcast_bias ? 1 : batch_size); _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, fp16_mixed_precision, act_info, gpu_arch); _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info); } protected: template void fill(U &&tensor, int i) { static_assert(std::is_floating_point::value || std::is_same::value, "Only floating point data types supported."); using DistributionType = typename std::conditional::value, arm_compute::utils::uniform_real_distribution_16bit, std::uniform_real_distribution>::type; DistributionType distribution{ T(-1.0f), T(1.0f) }; library->fill(tensor, distribution, i); // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0) DistributionType distribution_inf{ T(std::numeric_limits::infinity()), T(std::numeric_limits::infinity()) }; library->fill_borders_with_garbage(tensor, distribution_inf, i); } TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, DataType data_type, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info, GPUTarget gpu_arch) { // Create tensors TensorType lhs = create_tensor(lhs_shape, data_type, 1); TensorType rhs = create_tensor(rhs_shape, data_type, 1); TensorType bias = create_tensor(bias_shape, data_type, 1); TensorType lhs_reshaped; TensorType rhs_reshaped; TensorType dst; const unsigned int m = lhs_shape[1]; const unsigned int n = rhs_shape[0]; const unsigned int k = lhs_shape[0]; GEMMReshapeInfo reshape_info(m, n, k, rhs_info.h0, lhs_info.v0, 0, false, broadcast_bias); // The output tensor will be auto-initialized within the function // Create and configure function ReshapeLHSOperatorType reshape_lhs; ReshapeRHSOperatorType reshape_rhs; GEMMOperatorType gemm; reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); gemm.configure(gpu_arch, lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, true, reshape_info, fp16_mixed_precision, act_info); ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); // We do not pad when using image as it needs to comply to strict pitch alignment restrictions if(!rhs_info.export_to_cl_image) { add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst }); } // Allocate tensors lhs.allocator()->allocate(); rhs.allocator()->allocate(); lhs_reshaped.allocator()->allocate(); rhs_reshaped.allocator()->allocate(); bias.allocator()->allocate(); dst.allocator()->allocate(); ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable()); ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); fill(AccessorType(rhs), 1); fill(AccessorType(bias), 2); // Compute GEMM ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }; reshape_lhs.run(reshape_lhs_pack); ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; reshape_rhs.run(reshape_rhs_pack); ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, { ACL_SRC_1, &rhs_reshaped }, { ACL_SRC_2, &bias }, { ACL_DST, &dst } }); gemm.run(gemm_pack); return dst; } SimpleTensor compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) { TensorShape dst_shape = lhs_shape; dst_shape[0] = rhs_shape[0]; dst_shape[1] = lhs_shape[1]; // Create reference SimpleTensor lhs{ lhs_shape, data_type, 1 }; SimpleTensor rhs{ rhs_shape, data_type, 1 }; SimpleTensor bias{ dst_shape, data_type, 1 }; const int n = rhs_shape[0]; const int m = lhs_shape[1]; const int batch_size = lhs_shape[2]; // Fill reference fill(lhs, 0); fill(rhs, 1); fill(bias, 2); if(broadcast_bias) { // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); } } return reference::activation_layer(reference::gemm(lhs, rhs, bias, alpha, beta), act_info); } TensorType _target{}; SimpleTensor _reference{}; }; template class GEMMMatrixMultiplyInterleavedTransposed3DValidationFixture : public framework::Fixture { public: void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, unsigned int v0, unsigned int h0, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch) { ARM_COMPUTE_UNUSED(broadcast_bias); GEMMLHSMatrixInfo lhs_info; lhs_info.m0 = 4; lhs_info.k0 = 4; lhs_info.v0 = v0; lhs_info.interleave = true; lhs_info.transpose = true; GEMMRHSMatrixInfo rhs_info; rhs_info.n0 = 16 / sizeof(T); rhs_info.k0 = 1; rhs_info.h0 = h0; rhs_info.interleave = false; rhs_info.transpose = false; // In case of GEMM3D, m is the product between m_w and m_h const unsigned int m = m_w * m_h; // Set the tensor shapes for LHS and RHS matrices const TensorShape lhs_shape(k, m, batch_size); const TensorShape rhs_shape(n, k, batch_size); const TensorShape bias_shape(n, 1, 1); _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, m_h, fp16_mixed_precision, act_info, gpu_arch); _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info); } protected: template void fill(U &&tensor, int i) { static_assert(std::is_floating_point::value || std::is_same::value, "Only floating point data types supported."); using DistributionType = typename std::conditional::value, arm_compute::utils::uniform_real_distribution_16bit, std::uniform_real_distribution>::type; DistributionType distribution{ T(-1.0f), T(1.0f) }; library->fill(tensor, distribution, i); } TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, DataType data_type, float alpha, float beta, unsigned int m_h, bool fp16_mixed_precision, const ActivationLayerInfo &act_info, GPUTarget gpu_arch) { // Create tensors TensorType lhs = create_tensor(lhs_shape, data_type, 1); TensorType rhs = create_tensor(rhs_shape, data_type, 1); TensorType bias = create_tensor(bias_shape, data_type, 1); TensorType lhs_reshaped; TensorType rhs_reshaped; TensorType dst; const unsigned int m = lhs_shape[1]; const unsigned int n = rhs_shape[0]; const unsigned int k = lhs_shape[0]; GEMMReshapeInfo reshape_info(m, n, k, rhs_info.h0, lhs_info.v0, m_h, false, true); // The output tensor will be auto-initialized within the function // Create and configure function ReshapeLHSOperatorType reshape_lhs; ReshapeRHSOperatorType reshape_rhs; GEMMOperatorType gemm; reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); gemm.configure(gpu_arch, lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, true, reshape_info, fp16_mixed_precision, act_info); ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); // We do not pad when using image as it needs to comply to strict pitch alignment restrictions if(!rhs_info.export_to_cl_image) { add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst }); } // Allocate tensors lhs.allocator()->allocate(); rhs.allocator()->allocate(); lhs_reshaped.allocator()->allocate(); rhs_reshaped.allocator()->allocate(); bias.allocator()->allocate(); dst.allocator()->allocate(); ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable()); ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); fill(AccessorType(rhs), 1); fill(AccessorType(bias), 2); // Compute GEMM ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }; reshape_lhs.run(reshape_lhs_pack); ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; reshape_rhs.run(reshape_rhs_pack); ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, { ACL_SRC_1, &rhs_reshaped }, { ACL_SRC_2, &bias }, { ACL_DST, &dst } }); gemm.run(gemm_pack); return dst; } SimpleTensor compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, unsigned int m_h, const ActivationLayerInfo &act_info) { TensorShape dst_shape = lhs_shape; dst_shape.set(0, rhs_shape[0]); dst_shape.set(1, lhs_shape[1] / m_h); dst_shape.set(2, m_h); dst_shape.set(3, lhs_shape[2]); // Create reference SimpleTensor lhs{ lhs_shape, data_type, 1 }; SimpleTensor rhs{ rhs_shape, data_type, 1 }; SimpleTensor bias{ dst_shape, data_type, 1 }; const int n = rhs_shape[0]; const int m = lhs_shape[1]; const int batch_size = lhs_shape[2]; // Fill reference fill(lhs, 0); fill(rhs, 1); fill(bias, 2); // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); } return reference::activation_layer(reference::gemm(lhs, rhs, bias, alpha, beta), act_info); } TensorType _target{}; SimpleTensor _reference{}; }; template class GEMMMatrixMultiplyReshapedValidationFixture : public framework::Fixture { public: void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool interleave_lhs, bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, bool lhs_transpose, const ActivationLayerInfo &act_info) { GEMMLHSMatrixInfo lhs_info; lhs_info.m0 = m0; lhs_info.k0 = k0; lhs_info.v0 = v0; lhs_info.interleave = interleave_lhs; lhs_info.transpose = lhs_transpose; GEMMRHSMatrixInfo rhs_info; rhs_info.n0 = n0; rhs_info.k0 = k0; rhs_info.h0 = h0; rhs_info.interleave = interleave_rhs; rhs_info.transpose = !lhs_transpose; rhs_info.export_to_cl_image = export_to_cl_image; // Set the tensor shapes for LHS and RHS matrices const TensorShape lhs_shape(k, m, batch_size); const TensorShape rhs_shape(n, k, batch_size); const TensorShape bias_shape(n, broadcast_bias ? 1 : m, broadcast_bias ? 1 : batch_size); _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info); if(validate_result) { _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info); } } protected: template void fill(U &&tensor, int i) { static_assert(std::is_floating_point::value || std::is_same::value, "Only floating point data types supported."); using DistributionType = typename std::conditional::value, arm_compute::utils::uniform_real_distribution_16bit, std::uniform_real_distribution>::type; DistributionType distribution{ T(-1.0f), T(1.0f) }; library->fill(tensor, distribution, i); // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0) DistributionType distribution_inf{ T(std::numeric_limits::infinity()), T(std::numeric_limits::infinity()) }; library->fill_borders_with_garbage(tensor, distribution_inf, i); } TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) { // Create tensors TensorType lhs = create_tensor(lhs_shape, data_type, 1); TensorType rhs = create_tensor(rhs_shape, data_type, 1); TensorType bias = create_tensor(bias_shape, data_type, 1); TensorType lhs_reshaped; TensorType rhs_reshaped; TensorType dst; const unsigned int M = lhs_shape[1]; const unsigned int N = rhs_shape[0]; const unsigned int K = lhs_shape[0]; GEMMKernelInfo kernel_info; kernel_info.m = M; kernel_info.n = N; kernel_info.k = K; kernel_info.depth_output_gemm3d = 0; kernel_info.reinterpret_input_as_3d = false; kernel_info.broadcast_bias = broadcast_bias; kernel_info.activation_info = act_info; kernel_info.fp_mixed_precision = fp_mixed_precision; // The output tensor will be auto-initialized within the function // Create and configure function ReshapeLHSOperatorType reshape_lhs; ReshapeRHSOperatorType reshape_rhs; GEMMOperatorType gemm; validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); validate_result = validate_result || !rhs_info.export_to_cl_image; if(!validate_result) { return nullptr; } reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); // We do not pad when using image as it needs to comply to strict pitch alignment restrictions if(!rhs_info.export_to_cl_image) { add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst }); } // Allocate tensors lhs.allocator()->allocate(); rhs.allocator()->allocate(); lhs_reshaped.allocator()->allocate(); rhs_reshaped.allocator()->allocate(); bias.allocator()->allocate(); dst.allocator()->allocate(); ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable()); ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); fill(AccessorType(rhs), 1); fill(AccessorType(bias), 2); // Compute GEMM ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }; reshape_lhs.run(reshape_lhs_pack); ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; reshape_rhs.run(reshape_rhs_pack); ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, { ACL_SRC_1, &rhs_reshaped }, { ACL_SRC_2, &bias }, { ACL_DST, &dst } }); gemm.run(gemm_pack); return dst; } SimpleTensor compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) { TensorShape dst_shape = lhs_shape; dst_shape[0] = rhs_shape[0]; dst_shape[1] = lhs_shape[1]; // Create reference SimpleTensor lhs{ lhs_shape, data_type, 1 }; SimpleTensor rhs{ rhs_shape, data_type, 1 }; SimpleTensor bias{ dst_shape, data_type, 1 }; const int n = rhs_shape[0]; const int m = lhs_shape[1]; const int batch_size = lhs_shape[2]; // Fill reference fill(lhs, 0); fill(rhs, 1); fill(bias, 2); if(broadcast_bias) { // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); } } if(fp_mixed_precision) { return reference::activation_layer(reference::gemm_mixed_precision(lhs, rhs, bias, alpha, beta), act_info); } else { return reference::activation_layer(reference::gemm(lhs, rhs, bias, alpha, beta), act_info); } } bool validate_result = true; TensorType _target{}; SimpleTensor _reference{}; }; template class GEMMMatrixMultiplyReshaped3DValidationFixture : public framework::Fixture { public: void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool interleave_lhs, bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool lhs_transpose, const ActivationLayerInfo &act_info) { GEMMLHSMatrixInfo lhs_info; lhs_info.m0 = m0; lhs_info.k0 = k0; lhs_info.v0 = v0; lhs_info.interleave = interleave_lhs; lhs_info.transpose = lhs_transpose; GEMMRHSMatrixInfo rhs_info; rhs_info.n0 = n0; rhs_info.k0 = k0; rhs_info.h0 = h0; rhs_info.interleave = interleave_rhs; rhs_info.transpose = !lhs_transpose; rhs_info.export_to_cl_image = export_to_cl_image; // In case of GEMM3D, m is the product between m_w and m_h const unsigned int m = m_w * m_h; // Set the tensor shapes for LHS and RHS matrices const TensorShape lhs_shape(k, m, batch_size); const TensorShape rhs_shape(n, k, batch_size); const TensorShape bias_shape(n, 1, 1); _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, m_h, act_info); if(validate_result) { _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info); } } protected: template void fill(U &&tensor, int i) { static_assert(std::is_floating_point::value || std::is_same::value, "Only floating point data types supported."); using DistributionType = typename std::conditional::value, arm_compute::utils::uniform_real_distribution_16bit, std::uniform_real_distribution>::type; DistributionType distribution{ T(-1.0f), T(1.0f) }; library->fill(tensor, distribution, i); } TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, DataType data_type, float alpha, float beta, unsigned int m_h, const ActivationLayerInfo &act_info) { // Create tensors TensorType lhs = create_tensor(lhs_shape, data_type, 1); TensorType rhs = create_tensor(rhs_shape, data_type, 1); TensorType bias = create_tensor(bias_shape, data_type, 1); TensorType lhs_reshaped; TensorType rhs_reshaped; TensorType dst; const unsigned int M = lhs_shape[1]; const unsigned int N = rhs_shape[0]; const unsigned int K = lhs_shape[0]; GEMMKernelInfo kernel_info; kernel_info.m = M; kernel_info.n = N; kernel_info.k = K; kernel_info.depth_output_gemm3d = m_h; kernel_info.reinterpret_input_as_3d = false; kernel_info.broadcast_bias = true; kernel_info.activation_info = act_info; kernel_info.fp_mixed_precision = fp_mixed_precision; // The output tensor will be auto-initialized within the function // Create and configure function ReshapeLHSOperatorType reshape_lhs; ReshapeRHSOperatorType reshape_rhs; GEMMOperatorType gemm; validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); validate_result = validate_result || !rhs_info.export_to_cl_image; if(!validate_result) { return nullptr; } reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); // We do not pad when using image as it needs to comply to strict pitch alignment restrictions if(!rhs_info.export_to_cl_image) { add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst }); } // Allocate tensors lhs.allocator()->allocate(); rhs.allocator()->allocate(); lhs_reshaped.allocator()->allocate(); rhs_reshaped.allocator()->allocate(); bias.allocator()->allocate(); dst.allocator()->allocate(); ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable()); ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); fill(AccessorType(rhs), 1); fill(AccessorType(bias), 2); // Compute GEMM ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }; reshape_lhs.run(reshape_lhs_pack); ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; reshape_rhs.run(reshape_rhs_pack); ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, { ACL_SRC_1, &rhs_reshaped }, { ACL_SRC_2, &bias }, { ACL_DST, &dst } }); gemm.run(gemm_pack); return dst; } SimpleTensor compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, unsigned int m_h, const ActivationLayerInfo &act_info) { TensorShape dst_shape = lhs_shape; dst_shape.set(0, rhs_shape[0]); dst_shape.set(1, lhs_shape[1] / m_h); dst_shape.set(2, m_h); dst_shape.set(3, lhs_shape[2]); // Create reference SimpleTensor lhs{ lhs_shape, data_type, 1 }; SimpleTensor rhs{ rhs_shape, data_type, 1 }; SimpleTensor bias{ dst_shape, data_type, 1 }; const int n = rhs_shape[0]; const int m = lhs_shape[1]; const int batch_size = lhs_shape[2]; // Fill reference fill(lhs, 0); fill(rhs, 1); fill(bias, 2); // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); } if(fp_mixed_precision) { return reference::activation_layer(reference::gemm_mixed_precision(lhs, rhs, bias, alpha, beta), act_info); } else { return reference::activation_layer(reference::gemm(lhs, rhs, bias, alpha, beta), act_info); } } bool validate_result = true; TensorType _target{}; SimpleTensor _reference{}; }; template class GEMMMatrixMultiplyReshapedOnlyRHSValidationFixture : public framework::Fixture { public: void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0, bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) { GEMMLHSMatrixInfo lhs_info; lhs_info.m0 = m0; lhs_info.k0 = k0; GEMMRHSMatrixInfo rhs_info; rhs_info.n0 = n0; rhs_info.k0 = k0; rhs_info.h0 = h0; rhs_info.interleave = interleave_rhs; rhs_info.transpose = transpose_rhs; rhs_info.export_to_cl_image = export_to_cl_image; // Set the tensor shapes for LHS and RHS matrices const TensorShape lhs_shape(k, m, batch_size); const TensorShape rhs_shape(n, k, batch_size); const TensorShape bias_shape(n, broadcast_bias ? 1 : m, broadcast_bias ? 1 : batch_size); _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info); if(validate_result) { _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info); } } protected: template void fill(U &&tensor, int i) { static_assert(std::is_floating_point::value || std::is_same::value, "Only floating point data types supported."); using DistributionType = typename std::conditional::value, arm_compute::utils::uniform_real_distribution_16bit, std::uniform_real_distribution>::type; DistributionType distribution{ T(-1.0f), T(1.0f) }; library->fill(tensor, distribution, i); // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0) DistributionType distribution_inf{ T(std::numeric_limits::infinity()), T(std::numeric_limits::infinity()) }; library->fill_borders_with_garbage(tensor, distribution_inf, i); } TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) { // Create tensors TensorType lhs = create_tensor(lhs_shape, data_type, 1); TensorType rhs = create_tensor(rhs_shape, data_type, 1); TensorType bias = create_tensor(bias_shape, data_type, 1); TensorType rhs_reshaped; TensorType dst; const unsigned int M = lhs_shape[1]; const unsigned int N = rhs_shape[0]; const unsigned int K = lhs_shape[0]; GEMMKernelInfo kernel_info; kernel_info.m = M; kernel_info.n = N; kernel_info.k = K; kernel_info.depth_output_gemm3d = 0; kernel_info.reinterpret_input_as_3d = false; kernel_info.broadcast_bias = broadcast_bias; kernel_info.activation_info = act_info; // The output tensor will be auto-initialized within the function // Create and configure function ReshapeRHSOperatorType reshape_rhs; GEMMOperatorType gemm; validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); validate_result = validate_result || !rhs_info.export_to_cl_image; if(!validate_result) { return nullptr; } reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); // We do not pad when using image as it needs to comply to strict pitch alignment restrictions if(!rhs_info.export_to_cl_image) { add_padding_x({ &lhs, &rhs, &rhs_reshaped, &bias, &dst }); } // Allocate tensors lhs.allocator()->allocate(); rhs.allocator()->allocate(); rhs_reshaped.allocator()->allocate(); bias.allocator()->allocate(); dst.allocator()->allocate(); ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); fill(AccessorType(rhs), 1); fill(AccessorType(bias), 2); // Compute GEMM ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; reshape_rhs.run(reshape_rhs_pack); ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs_reshaped }, { ACL_SRC_2, &bias }, { ACL_DST, &dst } }); gemm.run(gemm_pack); return dst; } SimpleTensor compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) { TensorShape dst_shape = lhs_shape; dst_shape[0] = rhs_shape[0]; dst_shape[1] = lhs_shape[1]; // Create reference SimpleTensor lhs{ lhs_shape, data_type, 1 }; SimpleTensor rhs{ rhs_shape, data_type, 1 }; SimpleTensor bias{ dst_shape, data_type, 1 }; const int n = rhs_shape[0]; const int m = lhs_shape[1]; const int batch_size = lhs_shape[2]; // Fill reference fill(lhs, 0); fill(rhs, 1); fill(bias, 2); if(broadcast_bias) { // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); } } return reference::activation_layer(reference::gemm(lhs, rhs, bias, alpha, beta), act_info); } bool validate_result = true; TensorType _target{}; SimpleTensor _reference{}; }; template class GEMMMatrixMultiplyReshapedOnlyRHS3DValidationFixture : public framework::Fixture { public: void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0, bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, bool has_pad_y, DataType data_type, float alpha, float beta, const ActivationLayerInfo &act_info) { GEMMLHSMatrixInfo lhs_info; lhs_info.m0 = m0; lhs_info.k0 = k0; GEMMRHSMatrixInfo rhs_info; rhs_info.n0 = n0; rhs_info.k0 = k0; rhs_info.h0 = h0; rhs_info.interleave = interleave_rhs; rhs_info.transpose = transpose_rhs; rhs_info.export_to_cl_image = export_to_cl_image; // In case of GEMM3D, m is the product between m_w and m_h const unsigned int m = m_w * m_h; // Set the tensor shapes for LHS and RHS matrices const TensorShape lhs_shape(k, m, batch_size); const TensorShape rhs_shape(n, k, batch_size); const TensorShape bias_shape(n, 1, 1); _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, m_h, act_info, has_pad_y); if(validate_result) { _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info); } } protected: template void fill(U &&tensor, int i) { static_assert(std::is_floating_point::value || std::is_same::value, "Only floating point data types supported."); using DistributionType = typename std::conditional::value, arm_compute::utils::uniform_real_distribution_16bit, std::uniform_real_distribution>::type; DistributionType distribution{ T(-1.0f), T(1.0f) }; library->fill(tensor, distribution, i); } TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, DataType data_type, float alpha, float beta, unsigned int m_h, const ActivationLayerInfo &act_info, bool has_pad_y) { // Create tensors TensorType lhs = create_tensor(lhs_shape, data_type, 1); TensorType rhs = create_tensor(rhs_shape, data_type, 1); TensorType bias = create_tensor(bias_shape, data_type, 1); TensorType rhs_reshaped; TensorType dst; const unsigned int M = lhs_shape[1]; const unsigned int N = rhs_shape[0]; const unsigned int K = lhs_shape[0]; GEMMKernelInfo kernel_info; kernel_info.m = M; kernel_info.n = N; kernel_info.k = K; kernel_info.depth_output_gemm3d = m_h; kernel_info.reinterpret_input_as_3d = false; kernel_info.broadcast_bias = true; kernel_info.activation_info = act_info; kernel_info.has_pad_y = has_pad_y; // The output tensor will be auto-initialized within the function // Create and configure function ReshapeRHSOperatorType reshape_rhs; GEMMOperatorType gemm; validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); validate_result = validate_result || !rhs_info.export_to_cl_image; if(!validate_result) { return nullptr; } reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); if(has_pad_y) { // Add dummy padding into lhs to validate has_pad_y path lhs.info()->extend_padding(PaddingSize(2, 0, 2, 0)); dst.info()->extend_padding(PaddingSize(2, 0, 1, 0)); } ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); // We do not pad when using image as it needs to comply to strict pitch alignment restrictions if(!rhs_info.export_to_cl_image) { add_padding_x({ &lhs, &rhs, &rhs_reshaped, &bias, &dst }); } // Allocate tensors lhs.allocator()->allocate(); rhs.allocator()->allocate(); rhs_reshaped.allocator()->allocate(); bias.allocator()->allocate(); dst.allocator()->allocate(); ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); fill(AccessorType(rhs), 1); fill(AccessorType(bias), 2); // Compute GEMM ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; reshape_rhs.run(reshape_rhs_pack); ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs_reshaped }, { ACL_SRC_2, &bias }, { ACL_DST, &dst } }); gemm.run(gemm_pack); return dst; } SimpleTensor compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, unsigned int m_h, const ActivationLayerInfo &act_info) { TensorShape dst_shape = lhs_shape; dst_shape.set(0, rhs_shape[0]); dst_shape.set(1, lhs_shape[1] / m_h); dst_shape.set(2, m_h); dst_shape.set(3, lhs_shape[2]); // Create reference SimpleTensor lhs{ lhs_shape, data_type, 1 }; SimpleTensor rhs{ rhs_shape, data_type, 1 }; SimpleTensor bias{ dst_shape, data_type, 1 }; const int n = rhs_shape[0]; const int m = lhs_shape[1]; const int batch_size = lhs_shape[2]; // Fill reference fill(lhs, 0); fill(rhs, 1); fill(bias, 2); // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); } return reference::activation_layer(reference::gemm(lhs, rhs, bias, alpha, beta), act_info); } bool validate_result = true; TensorType _target{}; SimpleTensor _reference{}; }; template class GEMMMatrixMultiplyNativeValidationFixture : public framework::Fixture { public: void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) { GEMMLHSMatrixInfo lhs_info; lhs_info.m0 = m0; lhs_info.k0 = k0; GEMMRHSMatrixInfo rhs_info; rhs_info.n0 = n0; rhs_info.k0 = k0; // Set the tensor shapes for LHS and RHS matrices const TensorShape lhs_shape(k, m, batch_size); const TensorShape rhs_shape(n, k, batch_size); const TensorShape bias_shape(n, broadcast_bias ? 1 : m, broadcast_bias ? 1 : batch_size); _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info); _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info); } protected: template void fill(U &&tensor, int i) { static_assert(std::is_floating_point::value || std::is_same::value, "Only floating point data types supported."); using DistributionType = typename std::conditional::value, arm_compute::utils::uniform_real_distribution_16bit, std::uniform_real_distribution>::type; DistributionType distribution{ T(-1.0f), T(1.0f) }; library->fill(tensor, distribution, i); // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0) DistributionType distribution_inf{ T(std::numeric_limits::infinity()), T(std::numeric_limits::infinity()) }; library->fill_borders_with_garbage(tensor, distribution_inf, i); } TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) { // Create tensors TensorType lhs = create_tensor(lhs_shape, data_type, 1); TensorType rhs = create_tensor(rhs_shape, data_type, 1); TensorType bias = create_tensor(bias_shape, data_type, 1); TensorType dst; const unsigned int M = lhs_shape[1]; const unsigned int N = rhs_shape[0]; const unsigned int K = lhs_shape[0]; GEMMKernelInfo kernel_info; kernel_info.m = M; kernel_info.n = N; kernel_info.k = K; kernel_info.depth_output_gemm3d = 0; kernel_info.reinterpret_input_as_3d = false; kernel_info.broadcast_bias = broadcast_bias; kernel_info.activation_info = act_info; // Create and configure function GEMMOperatorType gemm; gemm.configure(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); add_padding_x({ &lhs, &rhs, &bias, &dst }); // Allocate tensors lhs.allocator()->allocate(); rhs.allocator()->allocate(); bias.allocator()->allocate(); dst.allocator()->allocate(); ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); fill(AccessorType(rhs), 1); fill(AccessorType(bias), 2); // Compute GEMM ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs }, { ACL_SRC_2, &bias }, { ACL_DST, &dst } }); gemm.run(gemm_pack); return dst; } SimpleTensor compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) { TensorShape dst_shape = lhs_shape; dst_shape[0] = rhs_shape[0]; dst_shape[1] = lhs_shape[1]; // Create reference SimpleTensor lhs{ lhs_shape, data_type, 1 }; SimpleTensor rhs{ rhs_shape, data_type, 1 }; SimpleTensor bias{ dst_shape, data_type, 1 }; const int n = rhs_shape[0]; const int m = lhs_shape[1]; const int batch_size = lhs_shape[2]; // Fill reference fill(lhs, 0); fill(rhs, 1); fill(bias, 2); if(broadcast_bias) { // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); } } return reference::activation_layer(reference::gemm(lhs, rhs, bias, alpha, beta), act_info); } TensorType _target{}; SimpleTensor _reference{}; }; template class GEMMMatrixMultiplyNative3DValidationFixture : public framework::Fixture { public: void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta, const ActivationLayerInfo &act_info) { GEMMLHSMatrixInfo lhs_info; lhs_info.m0 = m0; lhs_info.k0 = k0; GEMMRHSMatrixInfo rhs_info; rhs_info.n0 = n0; rhs_info.k0 = k0; // In case of GEMM3D, m is the product between m_w and m_h const unsigned int m = m_w * m_h; // Set the tensor shapes for LHS and RHS matrices const TensorShape lhs_shape(k, m, batch_size); const TensorShape rhs_shape(n, k, batch_size); const TensorShape bias_shape(n, 1, 1); _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, m_h, act_info); _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info); } protected: template void fill(U &&tensor, int i) { static_assert(std::is_floating_point::value || std::is_same::value, "Only floating point data types supported."); using DistributionType = typename std::conditional::value, arm_compute::utils::uniform_real_distribution_16bit, std::uniform_real_distribution>::type; DistributionType distribution{ T(-1.0f), T(1.0f) }; library->fill(tensor, distribution, i); } TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, DataType data_type, float alpha, float beta, unsigned int m_h, const ActivationLayerInfo &act_info) { // Create tensors TensorType lhs = create_tensor(lhs_shape, data_type, 1); TensorType rhs = create_tensor(rhs_shape, data_type, 1); TensorType bias = create_tensor(bias_shape, data_type, 1); TensorType dst; const unsigned int M = lhs_shape[1]; const unsigned int N = rhs_shape[0]; const unsigned int K = lhs_shape[0]; GEMMKernelInfo kernel_info; kernel_info.m = M; kernel_info.n = N; kernel_info.k = K; kernel_info.depth_output_gemm3d = m_h; kernel_info.reinterpret_input_as_3d = false; kernel_info.broadcast_bias = true; kernel_info.activation_info = act_info; // The output tensor will be auto-initialized within the function // Create and configure function GEMMOperatorType gemm; gemm.configure(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); add_padding_x({ &lhs, &rhs, &bias, &dst }); // Allocate tensors lhs.allocator()->allocate(); rhs.allocator()->allocate(); bias.allocator()->allocate(); dst.allocator()->allocate(); ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); fill(AccessorType(rhs), 1); fill(AccessorType(bias), 2); // Compute GEMM ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs }, { ACL_SRC_2, &bias }, { ACL_DST, &dst } }); gemm.run(gemm_pack); return dst; } SimpleTensor compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, unsigned int m_h, const ActivationLayerInfo &act_info) { TensorShape dst_shape = lhs_shape; dst_shape.set(0, rhs_shape[0]); dst_shape.set(1, lhs_shape[1] / m_h); dst_shape.set(2, m_h); dst_shape.set(3, lhs_shape[2]); // Create reference SimpleTensor lhs{ lhs_shape, data_type, 1 }; SimpleTensor rhs{ rhs_shape, data_type, 1 }; SimpleTensor bias{ dst_shape, data_type, 1 }; const int n = rhs_shape[0]; const int m = lhs_shape[1]; const int batch_size = lhs_shape[2]; // Fill reference fill(lhs, 0); fill(rhs, 1); fill(bias, 2); // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); } return reference::activation_layer(reference::gemm(lhs, rhs, bias, alpha, beta), act_info); } TensorType _target{}; SimpleTensor _reference{}; }; template class GEMMMatrixMultiplyReshapedOnlyRhsMMULValidationFixture : public framework::Fixture { public: void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) { GEMMLHSMatrixInfo lhs_info; lhs_info.m0 = m0; lhs_info.k0 = k0; GEMMRHSMatrixInfo rhs_info; rhs_info.n0 = n0; rhs_info.k0 = k0; rhs_info.interleave = true; rhs_info.transpose = false; rhs_info.h0 = 4; rhs_info.export_to_cl_image = export_to_cl_image; // Set the tensor shapes for LHS and RHS matrices const TensorShape lhs_shape(k, m, batch_size); const TensorShape rhs_shape(n, k, batch_size); const TensorShape bias_shape(n, broadcast_bias ? 1 : m, broadcast_bias ? 1 : batch_size); _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info); _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info); } protected: template void fill(U &&tensor, int i) { static_assert(std::is_floating_point::value || std::is_same::value, "Only floating point data types supported."); using DistributionType = typename std::conditional::value, arm_compute::utils::uniform_real_distribution_16bit, std::uniform_real_distribution>::type; DistributionType distribution{ T(-1.0f), T(1.0f) }; library->fill(tensor, distribution, i); // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0) DistributionType distribution_inf{ T(std::numeric_limits::infinity()), T(std::numeric_limits::infinity()) }; library->fill_borders_with_garbage(tensor, distribution_inf, i); } TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) { // Create tensors TensorType lhs = create_tensor(lhs_shape, data_type, 1); TensorType rhs = create_tensor(rhs_shape, data_type, 1); TensorType bias = create_tensor(bias_shape, data_type, 1); TensorType rhs_reshaped; TensorType dst; const unsigned int M = lhs_shape[1]; const unsigned int N = rhs_shape[0]; const unsigned int K = lhs_shape[0]; GEMMKernelInfo kernel_info; kernel_info.m = M; kernel_info.n = N; kernel_info.k = K; kernel_info.depth_output_gemm3d = 0; kernel_info.reinterpret_input_as_3d = false; kernel_info.broadcast_bias = broadcast_bias; kernel_info.activation_info = act_info; // Create and configure function ReshapeRHSOperatorType reshape_rhs; GEMMOperatorType gemm; validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); if(!validate_result) { return nullptr; } reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); validate_result = bool(gemm.validate(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info)); if(!validate_result) { return nullptr; } gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); // Allocate tensors lhs.allocator()->allocate(); rhs.allocator()->allocate(); rhs_reshaped.allocator()->allocate(); bias.allocator()->allocate(); dst.allocator()->allocate(); ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); fill(AccessorType(rhs), 1); fill(AccessorType(bias), 2); // Compute GEMM ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; reshape_rhs.run(reshape_rhs_pack); ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs_reshaped }, { ACL_SRC_2, &bias }, { ACL_DST, &dst } }); gemm.run(gemm_pack); return dst; } SimpleTensor compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) { if(!validate_result) return SimpleTensor(); TensorShape dst_shape = lhs_shape; dst_shape[0] = rhs_shape[0]; dst_shape[1] = lhs_shape[1]; // Create reference SimpleTensor lhs{ lhs_shape, data_type, 1 }; SimpleTensor rhs{ rhs_shape, data_type, 1 }; SimpleTensor bias{ dst_shape, data_type, 1 }; const int n = rhs_shape[0]; const int m = lhs_shape[1]; const int batch_size = lhs_shape[2]; // Fill reference fill(lhs, 0); fill(rhs, 1); fill(bias, 2); if(broadcast_bias) { // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); } } return reference::activation_layer(reference::gemm(lhs, rhs, bias, alpha, beta), act_info); } bool validate_result = true; TensorType _target{}; SimpleTensor _reference{}; }; } // namespace validation } // namespace test } // namespace arm_compute #endif // ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H