1 files changed, 285 insertions, 57 deletions
diff --git a/tests/validation/fixtures/GEMMFixture.h b/tests/validation/fixtures/GEMMFixture.h
index c118da66ae..94bedc83e1 100644
--- a/tests/validation/fixtures/GEMMFixture.h
+++ b/tests/validation/fixtures/GEMMFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_GEMM_FIXTURE
-#define ARM_COMPUTE_TEST_GEMM_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H
 
 #include "arm_compute/core/KernelDescriptors.h"
 #include "arm_compute/core/TensorShape.h"
@@ -34,6 +34,7 @@
 #include "tests/framework/Fixture.h"
 #include "tests/validation/Helpers.h"
 #include "tests/validation/reference/ActivationLayer.h"
+#include "tests/validation/reference/ElementwiseOperations.h"
 #include "tests/validation/reference/GEMM.h"
 
 #include <random>
@@ -44,16 +45,15 @@ namespace test
 {
 namespace validation
 {
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false>
-class GEMMValidationFixture : public framework::Fixture
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false, bool run_twice = false>
+class GEMMGenericValidationFixture : public framework::Fixture
 {
 public:
-    template <typename...>
-    void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type)
+    void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type, bool accumulate=false)
     {
         ARM_COMPUTE_UNUSED(pretranspose);
-        _target    = compute_target(shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type);
-        _reference = compute_reference(shape_a, shape_b, output_shape, alpha, beta, data_type);
+        _target    = compute_target(shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type, accumulate);
+        _reference = compute_reference(shape_a, shape_b, output_shape, alpha, beta, data_type, accumulate);
     }
 
 protected:
@@ -80,7 +80,7 @@ protected:
     }
 
     TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c, const TensorShape &output_shape, float alpha, float beta,
-                              DataType data_type)
+                              DataType data_type, bool accumulate=false)
     {
         // Create tensors
         TensorType a   = create_tensor<TensorType>(shape_a, data_type, 1);
@@ -98,8 +98,8 @@ protected:
                        (disable_c) ? nullptr : &c,
                        &dst,
                        alpha, beta,
-                       GEMMInfo(false, false, false, (reinterpret_output_as_3d ? output_shape[2] : 0), reinterpret_input_as_3d, false, GEMMLowpOutputStageInfo(), false, (reinterpret_input_as_3d
-                                || reinterpret_output_as_3d)));
+                       GEMMInfo(false, false, false, (reinterpret_output_as_3d ? output_shape[2] : 0), reinterpret_input_as_3d, false, GEMMLowpOutputStageInfo(), false, false, (reinterpret_input_as_3d
+                                || reinterpret_output_as_3d), arm_compute::ActivationLayerInfo(), false /* fixed_format */, arm_compute::WeightFormat::UNSPECIFIED, false /* pretranspose_B */, accumulate));
         ARM_COMPUTE_ASSERT(a.info()->is_resizable());
         ARM_COMPUTE_ASSERT(b.info()->is_resizable());
         ARM_COMPUTE_ASSERT(c.info()->is_resizable());
@@ -121,10 +121,25 @@ protected:
         // Fill tensors
         fill(AccessorType(a), 0);
         fill(AccessorType(b), 1);
+        if (accumulate)
+        {
+            fill(AccessorType(dst), 6);
+        }
         if(!disable_c)
         {
             fill(AccessorType(c), 2);
         }
+        // Run with variable inputs.
+        if(run_twice)
+        {
+            gemm.run();
+            fill(AccessorType(a), 3); // Fill tensors with new seed after run
+            fill(AccessorType(b), 4);
+            if(!disable_c)
+            {
+                fill(AccessorType(c), 5);
+            }
+        }
 
         // Compute GEMM function
         gemm.run();
@@ -133,10 +148,9 @@ protected:
     }
 
     SimpleTensor<T> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &output_shape, float alpha, float beta,
-                                      DataType data_type)
+                                      DataType data_type, bool accumulate=false)
     {
         TensorShape shape_a_to_use = shape_a;
-
         if(reinterpret_input_as_3d)
         {
             // Collapse the second and third dimension if the input is 3D
@@ -147,6 +161,7 @@ protected:
         SimpleTensor<T> a{ shape_a_to_use, data_type, 1 };
         SimpleTensor<T> b{ shape_b, data_type, 1 };
         SimpleTensor<T> c{ output_shape, data_type, 1 };
+        SimpleTensor<T> dst{ output_shape, data_type, 1 };
 
         // Fill reference
         fill(a, 0);
@@ -159,27 +174,96 @@ protected:
             const int m          = reinterpret_output_as_3d ? output_shape[1] * output_shape[2] : output_shape[1];
             const int batch_size = reinterpret_output_as_3d ? output_shape[3] : output_shape[2];
 
-            // In case of broadcast, we need simply copy the first into the following "M" ones
+            // In case of broadcast, we need to simply copy the first into the following "M" ones
             for(int i = 1; i < m * batch_size; i++)
             {
                 memcpy(c.data() + i * n, c.data(), n * sizeof(T));
             }
         }
 
+        /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M),
+           therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K)
+           in order to be able to call reference implementation that works with (B x M x K) input.
+           Similarly, if pretranspose_B is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */
+
+        // Define transposed shapes
+        TensorShape a_transposed_shape(a.shape().y(), a.shape().x());
+        TensorShape b_transposed_shape(b.shape().y(), b.shape().x());
+
+        // Define transposed tensors
+        SimpleTensor<T> a_transposed{ a_transposed_shape, data_type };
+        SimpleTensor<T> b_transposed{ b_transposed_shape, data_type };
+
+        // pretranspose a if necessary
+        if(pretranspose_a)
+        {
+            transpose_matrix<T>(a, a_transposed);
+        }
+
+        // pretranspose b if necessary
+        if(pretranspose_b)
+        {
+            transpose_matrix<T>(b, b_transposed);
+        }
+
+        // Run with variable inputs.
+        if(run_twice)
+        {
+            reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta);
+            fill((pretranspose_a) ? a_transposed : a, 3);
+            fill((pretranspose_b) ? b_transposed : b, 4);
+            fill(c, 5);
+        }
+
+        // Do in place summation
+        if (accumulate)
+        {
+            fill(dst, 6);
+        }
+
         // Setting beta to 0 will effectively disable C for the
         // computation of the reference: alpha * A * B + 0 * C
-        return reference::gemm<T>(a, b, c, alpha, disable_c ? 0.f : beta);
+        // Use transposed tensors if boolean enabled else use original tensors
+        if (accumulate)
+        {
+            reference::gemm_accumulate<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta, dst);
+            return dst;
+        }
+        else
+        {
+            return reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta);
+        }
     }
 
     TensorType      _target{};
     SimpleTensor<T> _reference{};
 };
 
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false, bool run_twice = false>
+class GEMMValidationFixture : protected GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice>
+{
+public:
+    void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type)
+    {
+        GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice>::setup(shape_a, shape_b, shape_c, output_shape, alpha, beta, pretranspose, data_type, false /*accumulate*/);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false, bool run_twice = false>
+class GEMMAccumulateValidationFixture : protected GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice>
+{
+public:
+    void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type)
+    {
+        bool accumulate = true;
+        GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice>::setup(shape_a, shape_b, shape_c, output_shape, alpha, beta, pretranspose, data_type, accumulate);
+    }
+};
+
 template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType>
 class GEMMMatrixMultiplyValidationFixture : public framework::Fixture
 {
 public:
-    template <typename...>
     void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info,
                DataType data_type, GPUTarget gpu_arch)
     {
@@ -255,8 +339,7 @@ protected:
         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
             { ACL_SRC_1, &rhs },
             { ACL_SRC_2, &bias },
-            { ACL_DST, &dst }
-        });
+            { ACL_DST, &dst } });
         gemm.run(gemm_pack);
 
         return dst;
@@ -285,7 +368,7 @@ protected:
 
         if(broadcast_bias)
         {
-            // In case of broadcast, we need simply copy the first into the following "M" ones
+            // In case of broadcast, we need to simply copy the first into the following "M" ones
             for(int i = 1; i < m * batch_size; i++)
             {
                 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -303,7 +386,6 @@ template <typename TensorType, typename AccessorType, typename T, typename GEMMO
 class GEMMMatrixMultiply3DValidationFixture : public framework::Fixture
 {
 public:
-    template <typename...>
     void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision,
                const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch)
     {
@@ -378,8 +460,7 @@ protected:
         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
             { ACL_SRC_1, &rhs },
             { ACL_SRC_2, &bias },
-            { ACL_DST, &dst }
-        });
+            { ACL_DST, &dst } });
         gemm.run(gemm_pack);
 
         return dst;
@@ -408,7 +489,7 @@ protected:
         fill(rhs, 1);
         fill(bias, 2);
 
-        // In case of broadcast, we need simply copy the first into the following "M" ones
+        // In case of broadcast, we need to simply copy the first into the following "M" ones
         for(int i = 1; i < m * batch_size; i++)
         {
             memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -425,7 +506,6 @@ template <typename TensorType, typename AccessorType, typename T, typename Resha
 class GEMMMatrixMultiplyInterleavedTransposedValidationFixture : public framework::Fixture
 {
 public:
-    template <typename...>
     void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, unsigned int v0, unsigned int h0, bool broadcast_bias, bool fp16_mixed_precision,
                const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch)
     {
@@ -533,8 +613,7 @@ protected:
         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
             { ACL_SRC_1, &rhs_reshaped },
             { ACL_SRC_2, &bias },
-            { ACL_DST, &dst }
-        });
+            { ACL_DST, &dst } });
         gemm.run(gemm_pack);
 
         return dst;
@@ -563,7 +642,7 @@ protected:
 
         if(broadcast_bias)
         {
-            // In case of broadcast, we need simply copy the first into the following "M" ones
+            // In case of broadcast, we need to simply copy the first into the following "M" ones
             for(int i = 1; i < m * batch_size; i++)
             {
                 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -581,7 +660,6 @@ template <typename TensorType, typename AccessorType, typename T, typename Resha
 class GEMMMatrixMultiplyInterleavedTransposed3DValidationFixture : public framework::Fixture
 {
 public:
-    template <typename...>
     void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, unsigned int v0, unsigned int h0, bool broadcast_bias,
                bool fp16_mixed_precision, const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch)
     {
@@ -688,8 +766,7 @@ protected:
         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
             { ACL_SRC_1, &rhs_reshaped },
             { ACL_SRC_2, &bias },
-            { ACL_DST, &dst }
-        });
+            { ACL_DST, &dst } });
         gemm.run(gemm_pack);
 
         return dst;
@@ -718,7 +795,7 @@ protected:
         fill(rhs, 1);
         fill(bias, 2);
 
-        // In case of broadcast, we need simply copy the first into the following "M" ones
+        // In case of broadcast, we need to simply copy the first into the following "M" ones
         for(int i = 1; i < m * batch_size; i++)
         {
             memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -735,7 +812,6 @@ template <typename TensorType, typename AccessorType, typename T, typename Resha
 class GEMMMatrixMultiplyReshapedValidationFixture : public framework::Fixture
 {
 public:
-    template <typename...>
     void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool interleave_lhs,
                bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, bool lhs_transpose, const ActivationLayerInfo &act_info)
     {
@@ -863,8 +939,7 @@ protected:
         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
             { ACL_SRC_1, &rhs_reshaped },
             { ACL_SRC_2, &bias },
-            { ACL_DST, &dst }
-        });
+            { ACL_DST, &dst } });
         gemm.run(gemm_pack);
 
         return dst;
@@ -893,7 +968,7 @@ protected:
 
         if(broadcast_bias)
         {
-            // In case of broadcast, we need simply copy the first into the following "M" ones
+            // In case of broadcast, we need to simply copy the first into the following "M" ones
             for(int i = 1; i < m * batch_size; i++)
             {
                 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -919,7 +994,6 @@ template <typename TensorType, typename AccessorType, typename T, typename Resha
 class GEMMMatrixMultiplyReshaped3DValidationFixture : public framework::Fixture
 {
 public:
-    template <typename...>
     void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0,
                bool interleave_lhs, bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool lhs_transpose, const ActivationLayerInfo &act_info)
     {
@@ -1044,8 +1118,7 @@ protected:
         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
             { ACL_SRC_1, &rhs_reshaped },
             { ACL_SRC_2, &bias },
-            { ACL_DST, &dst }
-        });
+            { ACL_DST, &dst } });
         gemm.run(gemm_pack);
 
         return dst;
@@ -1074,7 +1147,7 @@ protected:
         fill(rhs, 1);
         fill(bias, 2);
 
-        // In case of broadcast, we need simply copy the first into the following "M" ones
+        // In case of broadcast, we need to simply copy the first into the following "M" ones
         for(int i = 1; i < m * batch_size; i++)
         {
             memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -1099,7 +1172,6 @@ template <typename TensorType, typename AccessorType, typename T, typename Resha
 class GEMMMatrixMultiplyReshapedOnlyRHSValidationFixture : public framework::Fixture
 {
 public:
-    template <typename...>
     void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0,
                bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info)
     {
@@ -1216,8 +1288,7 @@ protected:
         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
             { ACL_SRC_1, &rhs_reshaped },
             { ACL_SRC_2, &bias },
-            { ACL_DST, &dst }
-        });
+            { ACL_DST, &dst } });
         gemm.run(gemm_pack);
 
         return dst;
@@ -1246,7 +1317,7 @@ protected:
 
         if(broadcast_bias)
         {
-            // In case of broadcast, we need simply copy the first into the following "M" ones
+            // In case of broadcast, we need to simply copy the first into the following "M" ones
             for(int i = 1; i < m * batch_size; i++)
             {
                 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -1265,7 +1336,6 @@ template <typename TensorType, typename AccessorType, typename T, typename Resha
 class GEMMMatrixMultiplyReshapedOnlyRHS3DValidationFixture : public framework::Fixture
 {
 public:
-    template <typename...>
     void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0,
                bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, bool has_pad_y, DataType data_type, float alpha, float beta, const ActivationLayerInfo &act_info)
     {
@@ -1387,8 +1457,7 @@ protected:
         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
             { ACL_SRC_1, &rhs_reshaped },
             { ACL_SRC_2, &bias },
-            { ACL_DST, &dst }
-        });
+            { ACL_DST, &dst } });
         gemm.run(gemm_pack);
 
         return dst;
@@ -1417,7 +1486,7 @@ protected:
         fill(rhs, 1);
         fill(bias, 2);
 
-        // In case of broadcast, we need simply copy the first into the following "M" ones
+        // In case of broadcast, we need to simply copy the first into the following "M" ones
         for(int i = 1; i < m * batch_size; i++)
         {
             memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -1435,7 +1504,6 @@ template <typename TensorType, typename AccessorType, typename T, typename GEMMO
 class GEMMMatrixMultiplyNativeValidationFixture : public framework::Fixture
 {
 public:
-    template <typename...>
     void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta, bool broadcast_bias,
                const ActivationLayerInfo &act_info)
     {
@@ -1524,8 +1592,7 @@ protected:
         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
             { ACL_SRC_1, &rhs },
             { ACL_SRC_2, &bias },
-            { ACL_DST, &dst }
-        });
+            { ACL_DST, &dst } });
         gemm.run(gemm_pack);
 
         return dst;
@@ -1554,7 +1621,7 @@ protected:
 
         if(broadcast_bias)
         {
-            // In case of broadcast, we need simply copy the first into the following "M" ones
+            // In case of broadcast, we need to simply copy the first into the following "M" ones
             for(int i = 1; i < m * batch_size; i++)
             {
                 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -1572,7 +1639,6 @@ template <typename TensorType, typename AccessorType, typename T, typename GEMMO
 class GEMMMatrixMultiplyNative3DValidationFixture : public framework::Fixture
 {
 public:
-    template <typename...>
     void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta,
                const ActivationLayerInfo &act_info)
     {
@@ -1660,8 +1726,7 @@ protected:
         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
             { ACL_SRC_1, &rhs },
             { ACL_SRC_2, &bias },
-            { ACL_DST, &dst }
-        });
+            { ACL_DST, &dst } });
         gemm.run(gemm_pack);
 
         return dst;
@@ -1690,7 +1755,7 @@ protected:
         fill(rhs, 1);
         fill(bias, 2);
 
-        // In case of broadcast, we need simply copy the first into the following "M" ones
+        // In case of broadcast, we need to simply copy the first into the following "M" ones
         for(int i = 1; i < m * batch_size; i++)
         {
             memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -1703,7 +1768,170 @@ protected:
     SimpleTensor<T> _reference{};
 };
 
+template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType>
+class GEMMMatrixMultiplyReshapedOnlyRhsMMULValidationFixture : public framework::Fixture
+{
+public:
+    void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, bool export_to_cl_image, DataType data_type, float alpha,
+               float beta, bool broadcast_bias,
+               const ActivationLayerInfo &act_info)
+    {
+        GEMMLHSMatrixInfo lhs_info;
+        lhs_info.m0 = m0;
+        lhs_info.k0 = k0;
+
+        GEMMRHSMatrixInfo rhs_info;
+        rhs_info.n0                 = n0;
+        rhs_info.k0                 = k0;
+        rhs_info.interleave         = true;
+        rhs_info.transpose          = false;
+        rhs_info.h0                 = 4;
+        rhs_info.export_to_cl_image = export_to_cl_image;
+
+        // Set the tensor shapes for LHS and RHS matrices
+        const TensorShape lhs_shape(k, m, batch_size);
+        const TensorShape rhs_shape(n, k, batch_size);
+        const TensorShape bias_shape(n,
+                                     broadcast_bias ? 1 : m,
+                                     broadcast_bias ? 1 : batch_size);
+
+        _target    = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info);
+        _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
+        using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
+
+        DistributionType distribution{ T(-1.0f), T(1.0f) };
+        library->fill(tensor, distribution, i);
+
+        // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
+        DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
+        library->fill_borders_with_garbage(tensor, distribution_inf, i);
+    }
+
+    TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
+                              DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info)
+    {
+        // Create tensors
+        TensorType lhs  = create_tensor<TensorType>(lhs_shape, data_type, 1);
+        TensorType rhs  = create_tensor<TensorType>(rhs_shape, data_type, 1);
+        TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
+        TensorType rhs_reshaped;
+        TensorType dst;
+
+        const unsigned int M = lhs_shape[1];
+        const unsigned int N = rhs_shape[0];
+        const unsigned int K = lhs_shape[0];
+        GEMMKernelInfo     kernel_info;
+        kernel_info.m                       = M;
+        kernel_info.n                       = N;
+        kernel_info.k                       = K;
+        kernel_info.depth_output_gemm3d     = 0;
+        kernel_info.reinterpret_input_as_3d = false;
+        kernel_info.broadcast_bias          = broadcast_bias;
+        kernel_info.activation_info         = act_info;
+
+        // Create and configure function
+        ReshapeRHSOperatorType reshape_rhs;
+        GEMMOperatorType       gemm;
+
+        validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info));
+        if(!validate_result)
+        {
+            return nullptr;
+        }
+
+        reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
+
+        validate_result = bool(gemm.validate(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info));
+        if(!validate_result)
+        {
+            return nullptr;
+        }
+
+        gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
+
+        ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
+
+        // Allocate tensors
+        lhs.allocator()->allocate();
+        rhs.allocator()->allocate();
+        rhs_reshaped.allocator()->allocate();
+        bias.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+
+        // Fill tensors
+        fill(AccessorType(lhs), 0);
+        fill(AccessorType(rhs), 1);
+        fill(AccessorType(bias), 2);
+
+        // Compute GEMM
+        ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
+        reshape_rhs.run(reshape_rhs_pack);
+        ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
+            { ACL_SRC_1, &rhs_reshaped },
+            { ACL_SRC_2, &bias },
+            { ACL_DST, &dst } });
+        gemm.run(gemm_pack);
+
+        return dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
+                                      const ActivationLayerInfo &act_info)
+    {
+        if(!validate_result)
+            return SimpleTensor<T>();
+
+        TensorShape dst_shape = lhs_shape;
+        dst_shape[0]          = rhs_shape[0];
+        dst_shape[1]          = lhs_shape[1];
+
+        // Create reference
+        SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
+        SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
+        SimpleTensor<T> bias{ dst_shape, data_type, 1 };
+
+        const int n          = rhs_shape[0];
+        const int m          = lhs_shape[1];
+        const int batch_size = lhs_shape[2];
+
+        // Fill reference
+        fill(lhs, 0);
+        fill(rhs, 1);
+        fill(bias, 2);
+
+        if(broadcast_bias)
+        {
+            // In case of broadcast, we need to simply copy the first into the following "M" ones
+            for(int i = 1; i < m * batch_size; i++)
+            {
+                memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
+            }
+        }
+
+        return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
+    }
+
+    bool            validate_result = true;
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+};
+
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_GEMM_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H