aboutsummaryrefslogtreecommitdiff
path: root/tests/validation/fixtures/GEMMFixture.h
diff options
context:
space:
mode:
Diffstat (limited to 'tests/validation/fixtures/GEMMFixture.h')
-rw-r--r--tests/validation/fixtures/GEMMFixture.h342
1 files changed, 285 insertions, 57 deletions
diff --git a/tests/validation/fixtures/GEMMFixture.h b/tests/validation/fixtures/GEMMFixture.h
index c118da66ae..94bedc83e1 100644
--- a/tests/validation/fixtures/GEMMFixture.h
+++ b/tests/validation/fixtures/GEMMFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_GEMM_FIXTURE
-#define ARM_COMPUTE_TEST_GEMM_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/TensorShape.h"
@@ -34,6 +34,7 @@
#include "tests/framework/Fixture.h"
#include "tests/validation/Helpers.h"
#include "tests/validation/reference/ActivationLayer.h"
+#include "tests/validation/reference/ElementwiseOperations.h"
#include "tests/validation/reference/GEMM.h"
#include <random>
@@ -44,16 +45,15 @@ namespace test
{
namespace validation
{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false>
-class GEMMValidationFixture : public framework::Fixture
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false, bool run_twice = false>
+class GEMMGenericValidationFixture : public framework::Fixture
{
public:
- template <typename...>
- void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type)
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type, bool accumulate=false)
{
ARM_COMPUTE_UNUSED(pretranspose);
- _target = compute_target(shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type);
- _reference = compute_reference(shape_a, shape_b, output_shape, alpha, beta, data_type);
+ _target = compute_target(shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type, accumulate);
+ _reference = compute_reference(shape_a, shape_b, output_shape, alpha, beta, data_type, accumulate);
}
protected:
@@ -80,7 +80,7 @@ protected:
}
TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c, const TensorShape &output_shape, float alpha, float beta,
- DataType data_type)
+ DataType data_type, bool accumulate=false)
{
// Create tensors
TensorType a = create_tensor<TensorType>(shape_a, data_type, 1);
@@ -98,8 +98,8 @@ protected:
(disable_c) ? nullptr : &c,
&dst,
alpha, beta,
- GEMMInfo(false, false, false, (reinterpret_output_as_3d ? output_shape[2] : 0), reinterpret_input_as_3d, false, GEMMLowpOutputStageInfo(), false, (reinterpret_input_as_3d
- || reinterpret_output_as_3d)));
+ GEMMInfo(false, false, false, (reinterpret_output_as_3d ? output_shape[2] : 0), reinterpret_input_as_3d, false, GEMMLowpOutputStageInfo(), false, false, (reinterpret_input_as_3d
+ || reinterpret_output_as_3d), arm_compute::ActivationLayerInfo(), false /* fixed_format */, arm_compute::WeightFormat::UNSPECIFIED, false /* pretranspose_B */, accumulate));
ARM_COMPUTE_ASSERT(a.info()->is_resizable());
ARM_COMPUTE_ASSERT(b.info()->is_resizable());
ARM_COMPUTE_ASSERT(c.info()->is_resizable());
@@ -121,10 +121,25 @@ protected:
// Fill tensors
fill(AccessorType(a), 0);
fill(AccessorType(b), 1);
+ if (accumulate)
+ {
+ fill(AccessorType(dst), 6);
+ }
if(!disable_c)
{
fill(AccessorType(c), 2);
}
+ // Run with variable inputs.
+ if(run_twice)
+ {
+ gemm.run();
+ fill(AccessorType(a), 3); // Fill tensors with new seed after run
+ fill(AccessorType(b), 4);
+ if(!disable_c)
+ {
+ fill(AccessorType(c), 5);
+ }
+ }
// Compute GEMM function
gemm.run();
@@ -133,10 +148,9 @@ protected:
}
SimpleTensor<T> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &output_shape, float alpha, float beta,
- DataType data_type)
+ DataType data_type, bool accumulate=false)
{
TensorShape shape_a_to_use = shape_a;
-
if(reinterpret_input_as_3d)
{
// Collapse the second and third dimension if the input is 3D
@@ -147,6 +161,7 @@ protected:
SimpleTensor<T> a{ shape_a_to_use, data_type, 1 };
SimpleTensor<T> b{ shape_b, data_type, 1 };
SimpleTensor<T> c{ output_shape, data_type, 1 };
+ SimpleTensor<T> dst{ output_shape, data_type, 1 };
// Fill reference
fill(a, 0);
@@ -159,27 +174,96 @@ protected:
const int m = reinterpret_output_as_3d ? output_shape[1] * output_shape[2] : output_shape[1];
const int batch_size = reinterpret_output_as_3d ? output_shape[3] : output_shape[2];
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(c.data() + i * n, c.data(), n * sizeof(T));
}
}
+ /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M),
+ therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K)
+ in order to be able to call reference implementation that works with (B x M x K) input.
+ Similarly, if pretranspose_B is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */
+
+ // Define transposed shapes
+ TensorShape a_transposed_shape(a.shape().y(), a.shape().x());
+ TensorShape b_transposed_shape(b.shape().y(), b.shape().x());
+
+ // Define transposed tensors
+ SimpleTensor<T> a_transposed{ a_transposed_shape, data_type };
+ SimpleTensor<T> b_transposed{ b_transposed_shape, data_type };
+
+ // pretranspose a if necessary
+ if(pretranspose_a)
+ {
+ transpose_matrix<T>(a, a_transposed);
+ }
+
+ // pretranspose b if necessary
+ if(pretranspose_b)
+ {
+ transpose_matrix<T>(b, b_transposed);
+ }
+
+ // Run with variable inputs.
+ if(run_twice)
+ {
+ reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta);
+ fill((pretranspose_a) ? a_transposed : a, 3);
+ fill((pretranspose_b) ? b_transposed : b, 4);
+ fill(c, 5);
+ }
+
+ // Do in place summation
+ if (accumulate)
+ {
+ fill(dst, 6);
+ }
+
// Setting beta to 0 will effectively disable C for the
// computation of the reference: alpha * A * B + 0 * C
- return reference::gemm<T>(a, b, c, alpha, disable_c ? 0.f : beta);
+ // Use transposed tensors if boolean enabled else use original tensors
+ if (accumulate)
+ {
+ reference::gemm_accumulate<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta, dst);
+ return dst;
+ }
+ else
+ {
+ return reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta);
+ }
}
TensorType _target{};
SimpleTensor<T> _reference{};
};
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false, bool run_twice = false>
+class GEMMValidationFixture : protected GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice>
+{
+public:
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type)
+ {
+ GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice>::setup(shape_a, shape_b, shape_c, output_shape, alpha, beta, pretranspose, data_type, false /*accumulate*/);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false, bool run_twice = false>
+class GEMMAccumulateValidationFixture : protected GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice>
+{
+public:
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type)
+ {
+ bool accumulate = true;
+ GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice>::setup(shape_a, shape_b, shape_c, output_shape, alpha, beta, pretranspose, data_type, accumulate);
+ }
+};
+
template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType>
class GEMMMatrixMultiplyValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info,
DataType data_type, GPUTarget gpu_arch)
{
@@ -255,8 +339,7 @@ protected:
ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
{ ACL_SRC_1, &rhs },
{ ACL_SRC_2, &bias },
- { ACL_DST, &dst }
- });
+ { ACL_DST, &dst } });
gemm.run(gemm_pack);
return dst;
@@ -285,7 +368,7 @@ protected:
if(broadcast_bias)
{
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -303,7 +386,6 @@ template <typename TensorType, typename AccessorType, typename T, typename GEMMO
class GEMMMatrixMultiply3DValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision,
const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch)
{
@@ -378,8 +460,7 @@ protected:
ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
{ ACL_SRC_1, &rhs },
{ ACL_SRC_2, &bias },
- { ACL_DST, &dst }
- });
+ { ACL_DST, &dst } });
gemm.run(gemm_pack);
return dst;
@@ -408,7 +489,7 @@ protected:
fill(rhs, 1);
fill(bias, 2);
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -425,7 +506,6 @@ template <typename TensorType, typename AccessorType, typename T, typename Resha
class GEMMMatrixMultiplyInterleavedTransposedValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, unsigned int v0, unsigned int h0, bool broadcast_bias, bool fp16_mixed_precision,
const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch)
{
@@ -533,8 +613,7 @@ protected:
ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
{ ACL_SRC_1, &rhs_reshaped },
{ ACL_SRC_2, &bias },
- { ACL_DST, &dst }
- });
+ { ACL_DST, &dst } });
gemm.run(gemm_pack);
return dst;
@@ -563,7 +642,7 @@ protected:
if(broadcast_bias)
{
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -581,7 +660,6 @@ template <typename TensorType, typename AccessorType, typename T, typename Resha
class GEMMMatrixMultiplyInterleavedTransposed3DValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, unsigned int v0, unsigned int h0, bool broadcast_bias,
bool fp16_mixed_precision, const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch)
{
@@ -688,8 +766,7 @@ protected:
ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
{ ACL_SRC_1, &rhs_reshaped },
{ ACL_SRC_2, &bias },
- { ACL_DST, &dst }
- });
+ { ACL_DST, &dst } });
gemm.run(gemm_pack);
return dst;
@@ -718,7 +795,7 @@ protected:
fill(rhs, 1);
fill(bias, 2);
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -735,7 +812,6 @@ template <typename TensorType, typename AccessorType, typename T, typename Resha
class GEMMMatrixMultiplyReshapedValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool interleave_lhs,
bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, bool lhs_transpose, const ActivationLayerInfo &act_info)
{
@@ -863,8 +939,7 @@ protected:
ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
{ ACL_SRC_1, &rhs_reshaped },
{ ACL_SRC_2, &bias },
- { ACL_DST, &dst }
- });
+ { ACL_DST, &dst } });
gemm.run(gemm_pack);
return dst;
@@ -893,7 +968,7 @@ protected:
if(broadcast_bias)
{
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -919,7 +994,6 @@ template <typename TensorType, typename AccessorType, typename T, typename Resha
class GEMMMatrixMultiplyReshaped3DValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0,
bool interleave_lhs, bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool lhs_transpose, const ActivationLayerInfo &act_info)
{
@@ -1044,8 +1118,7 @@ protected:
ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
{ ACL_SRC_1, &rhs_reshaped },
{ ACL_SRC_2, &bias },
- { ACL_DST, &dst }
- });
+ { ACL_DST, &dst } });
gemm.run(gemm_pack);
return dst;
@@ -1074,7 +1147,7 @@ protected:
fill(rhs, 1);
fill(bias, 2);
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -1099,7 +1172,6 @@ template <typename TensorType, typename AccessorType, typename T, typename Resha
class GEMMMatrixMultiplyReshapedOnlyRHSValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0,
bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info)
{
@@ -1216,8 +1288,7 @@ protected:
ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
{ ACL_SRC_1, &rhs_reshaped },
{ ACL_SRC_2, &bias },
- { ACL_DST, &dst }
- });
+ { ACL_DST, &dst } });
gemm.run(gemm_pack);
return dst;
@@ -1246,7 +1317,7 @@ protected:
if(broadcast_bias)
{
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -1265,7 +1336,6 @@ template <typename TensorType, typename AccessorType, typename T, typename Resha
class GEMMMatrixMultiplyReshapedOnlyRHS3DValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0,
bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, bool has_pad_y, DataType data_type, float alpha, float beta, const ActivationLayerInfo &act_info)
{
@@ -1387,8 +1457,7 @@ protected:
ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
{ ACL_SRC_1, &rhs_reshaped },
{ ACL_SRC_2, &bias },
- { ACL_DST, &dst }
- });
+ { ACL_DST, &dst } });
gemm.run(gemm_pack);
return dst;
@@ -1417,7 +1486,7 @@ protected:
fill(rhs, 1);
fill(bias, 2);
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -1435,7 +1504,6 @@ template <typename TensorType, typename AccessorType, typename T, typename GEMMO
class GEMMMatrixMultiplyNativeValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta, bool broadcast_bias,
const ActivationLayerInfo &act_info)
{
@@ -1524,8 +1592,7 @@ protected:
ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
{ ACL_SRC_1, &rhs },
{ ACL_SRC_2, &bias },
- { ACL_DST, &dst }
- });
+ { ACL_DST, &dst } });
gemm.run(gemm_pack);
return dst;
@@ -1554,7 +1621,7 @@ protected:
if(broadcast_bias)
{
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -1572,7 +1639,6 @@ template <typename TensorType, typename AccessorType, typename T, typename GEMMO
class GEMMMatrixMultiplyNative3DValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta,
const ActivationLayerInfo &act_info)
{
@@ -1660,8 +1726,7 @@ protected:
ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
{ ACL_SRC_1, &rhs },
{ ACL_SRC_2, &bias },
- { ACL_DST, &dst }
- });
+ { ACL_DST, &dst } });
gemm.run(gemm_pack);
return dst;
@@ -1690,7 +1755,7 @@ protected:
fill(rhs, 1);
fill(bias, 2);
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -1703,7 +1768,170 @@ protected:
SimpleTensor<T> _reference{};
};
+template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType>
+class GEMMMatrixMultiplyReshapedOnlyRhsMMULValidationFixture : public framework::Fixture
+{
+public:
+ void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, bool export_to_cl_image, DataType data_type, float alpha,
+ float beta, bool broadcast_bias,
+ const ActivationLayerInfo &act_info)
+ {
+ GEMMLHSMatrixInfo lhs_info;
+ lhs_info.m0 = m0;
+ lhs_info.k0 = k0;
+
+ GEMMRHSMatrixInfo rhs_info;
+ rhs_info.n0 = n0;
+ rhs_info.k0 = k0;
+ rhs_info.interleave = true;
+ rhs_info.transpose = false;
+ rhs_info.h0 = 4;
+ rhs_info.export_to_cl_image = export_to_cl_image;
+
+ // Set the tensor shapes for LHS and RHS matrices
+ const TensorShape lhs_shape(k, m, batch_size);
+ const TensorShape rhs_shape(n, k, batch_size);
+ const TensorShape bias_shape(n,
+ broadcast_bias ? 1 : m,
+ broadcast_bias ? 1 : batch_size);
+
+ _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info);
+ _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info);
+ }
+
+protected:
+ template <typename U>
+ void fill(U &&tensor, int i)
+ {
+ static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
+ using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
+
+ DistributionType distribution{ T(-1.0f), T(1.0f) };
+ library->fill(tensor, distribution, i);
+
+ // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
+ DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
+ library->fill_borders_with_garbage(tensor, distribution_inf, i);
+ }
+
+ TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
+ DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info)
+ {
+ // Create tensors
+ TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
+ TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
+ TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
+ TensorType rhs_reshaped;
+ TensorType dst;
+
+ const unsigned int M = lhs_shape[1];
+ const unsigned int N = rhs_shape[0];
+ const unsigned int K = lhs_shape[0];
+ GEMMKernelInfo kernel_info;
+ kernel_info.m = M;
+ kernel_info.n = N;
+ kernel_info.k = K;
+ kernel_info.depth_output_gemm3d = 0;
+ kernel_info.reinterpret_input_as_3d = false;
+ kernel_info.broadcast_bias = broadcast_bias;
+ kernel_info.activation_info = act_info;
+
+ // Create and configure function
+ ReshapeRHSOperatorType reshape_rhs;
+ GEMMOperatorType gemm;
+
+ validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info));
+ if(!validate_result)
+ {
+ return nullptr;
+ }
+
+ reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
+
+ validate_result = bool(gemm.validate(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info));
+ if(!validate_result)
+ {
+ return nullptr;
+ }
+
+ gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
+
+ ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
+
+ // Allocate tensors
+ lhs.allocator()->allocate();
+ rhs.allocator()->allocate();
+ rhs_reshaped.allocator()->allocate();
+ bias.allocator()->allocate();
+ dst.allocator()->allocate();
+
+ ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+
+ // Fill tensors
+ fill(AccessorType(lhs), 0);
+ fill(AccessorType(rhs), 1);
+ fill(AccessorType(bias), 2);
+
+ // Compute GEMM
+ ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
+ reshape_rhs.run(reshape_rhs_pack);
+ ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
+ { ACL_SRC_1, &rhs_reshaped },
+ { ACL_SRC_2, &bias },
+ { ACL_DST, &dst } });
+ gemm.run(gemm_pack);
+
+ return dst;
+ }
+
+ SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
+ const ActivationLayerInfo &act_info)
+ {
+ if(!validate_result)
+ return SimpleTensor<T>();
+
+ TensorShape dst_shape = lhs_shape;
+ dst_shape[0] = rhs_shape[0];
+ dst_shape[1] = lhs_shape[1];
+
+ // Create reference
+ SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
+ SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
+ SimpleTensor<T> bias{ dst_shape, data_type, 1 };
+
+ const int n = rhs_shape[0];
+ const int m = lhs_shape[1];
+ const int batch_size = lhs_shape[2];
+
+ // Fill reference
+ fill(lhs, 0);
+ fill(rhs, 1);
+ fill(bias, 2);
+
+ if(broadcast_bias)
+ {
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
+ for(int i = 1; i < m * batch_size; i++)
+ {
+ memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
+ }
+ }
+
+ return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
+ }
+
+ bool validate_result = true;
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+};
+
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_GEMM_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H