From 232c9adefedf62aef591e9cd6316266ee0f29a27 Mon Sep 17 00:00:00 2001
From: Ryo Suzuki <ryo.suzuki@arm.com>
Date: Wed, 19 Jun 2024 09:37:24 +0000
Subject: Provide a wrapper class to expose cpu::CpuGemm

This wrapper allows us to utilize the functionality of CpuGemm
without directly exposing the source code.

Change-Id: I408630f52acd610c912e5c5fa02bfee5f884471e
Signed-off-by: Ryo Suzuki <ryo.suzuki@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11607
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
---
 Android.bp                                         |   1 +
 .../runtime/experimental/operators/CpuGemm.h       | 134 +++++++++++++++++++
 docs/user_guide/release_version_and_change_log.dox |   1 +
 filelist.json                                      |   3 +-
 src/BUILD.bazel                                    |   3 +-
 src/CMakeLists.txt                                 |   1 +
 src/runtime/experimental/operators/CpuGemm.cpp     |  96 ++++++++++++++
 tests/BUILD.bazel                                  |   3 +-
 tests/SConscript                                   |   3 +
 tests/validation/CMakeLists.txt                    |   5 +-
 .../runtime/experimental/operators/CpuGemm.cpp     | 143 +++++++++++++++++++++
 11 files changed, 388 insertions(+), 5 deletions(-)
 create mode 100644 arm_compute/runtime/experimental/operators/CpuGemm.h
 create mode 100644 src/runtime/experimental/operators/CpuGemm.cpp
 create mode 100644 tests/validation/runtime/experimental/operators/CpuGemm.cpp

diff --git a/Android.bp b/Android.bp
index bab5015322..edb494f121 100644
--- a/Android.bp
+++ b/Android.bp
@@ -1012,6 +1012,7 @@ cc_library_static {
         "src/runtime/Tensor.cpp",
         "src/runtime/TensorAllocator.cpp",
         "src/runtime/Utils.cpp",
+        "src/runtime/experimental/operators/CpuGemm.cpp",
         "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.cpp",
         "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.cpp",
         "src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.cpp",
diff --git a/arm_compute/runtime/experimental/operators/CpuGemm.h b/arm_compute/runtime/experimental/operators/CpuGemm.h
new file mode 100644
index 0000000000..e397cbf006
--- /dev/null
+++ b/arm_compute/runtime/experimental/operators/CpuGemm.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUGEMM_H
+#define ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUGEMM_H
+
+#include "arm_compute/core/ITensorPack.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/function_info/GEMMInfo.h"
+#include "arm_compute/runtime/IOperator.h"
+
+/*
+ * A shallow wrapper for arm_compute::cpu::CpuGemm.
+ * Any new features should be added to arm_compute::cpu::CpuGemm and
+ * arm_compute::experimental::ops::CpuGemm should remain a shallow wrapper.
+*/
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace ops
+{
+/** Wrapper class for CpuGemm. For information on the operators,
+ * see "src/cpu/operators/CpuGemm.h"
+*/
+class CpuGemm : IOperator
+{
+public:
+    /** Constructor **/
+    CpuGemm();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CpuGemm(const CpuGemm &) = delete;
+    /** Default move constructor */
+    CpuGemm(CpuGemm &&) = default;
+    /** Default destructor */
+    ~CpuGemm();
+
+    /** Configure operator for a given list of arguments
+     *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |a            |b           |c         |d              |
+     * |:------------|:-----------|:---------|:--------------|
+     * |F32          |F32         |F32       |F32            |
+     * |F16          |F16         |F16       |F16            |
+     * |BFLOAT16     |BFLOAT16    |BFLOAT16  |FP32           |
+     *
+     * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
+     * @note GEMM: The tensors a, b, c, d must have the same data type. You should not mix data types when calling this function.
+     *
+     * @note Batched GEMM only supports broadcasting cases where RHS rank < LHS rank but not the other way around
+     *
+     * @param[in]      a         First input tensor info (Matrix A or Vector A). Data type supported: BFLOAT16/F16/F32
+     * @param[in]      b         Second input tensor info (Matrix B). Data type supported: same as @p a
+     * @param[in]      c         Third input tensor info (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a
+     * @param[out]     d         Output tensor info. Data type supported: same as @p a
+     * @param[in]      alpha     Weight of the matrix product
+     * @param[in]      beta      Weight of matrix C
+     * @param[in, out] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
+     *                       if the reshape of matrix B should happen only for the first run
+     */
+    void configure(const ITensorInfo *a,
+                   const ITensorInfo *b,
+                   const ITensorInfo *c,
+                   ITensorInfo       *d,
+                   float              alpha,
+                   float              beta,
+                   const GEMMInfo    &gemm_info = GEMMInfo());
+
+    /** Static function to check if given info will lead to a valid configuration of @ref CpuGemm.
+     *
+     * Similar to @ref CpuGemm::configure()
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *a,
+                           const ITensorInfo *b,
+                           const ITensorInfo *c,
+                           const ITensorInfo *d,
+                           float              alpha,
+                           float              beta,
+                           const GEMMInfo    &gemm_info = GEMMInfo());
+
+    /** Indicates whether or not there is an optimal assembly implementation that can be used to process the given parameters.
+     *
+     * This method has the same use of @ref
+     * NEGEMMConvolutionLayer::has_opt_impl, with the only caveat that
+     * the value of arm_compute::WeightFormat need to be passed via the
+     * parameter gemm_info.
+     */
+    static Status has_opt_impl(arm_compute::WeightFormat &weight_format,
+                               const ITensorInfo         *a,
+                               const ITensorInfo         *b,
+                               const ITensorInfo         *c,
+                               const ITensorInfo         *d,
+                               const GEMMInfo            &gemm_info = GEMMInfo());
+
+    void                             run(ITensorPack &tensors);
+    void                             prepare(ITensorPack &constants);
+    experimental::MemoryRequirements workspace() const;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
+};
+} // namespace ops
+} // namespace experimental
+} // namespace arm_compute
+
+#endif // ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUGEMM_H
diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox
index a82520a5b2..8047938425 100644
--- a/docs/user_guide/release_version_and_change_log.dox
+++ b/docs/user_guide/release_version_and_change_log.dox
@@ -49,6 +49,7 @@ v24.07 Public major release
  - Remove unused "get_default_activation_values" functions.
  - Add SVE fixed format interleaved BF16 DOT kernel.
  - Updates and optimizations to assembly kernels.
+ - Expose CpuGemm functionality using the experimental operators api
 
 v24.06 Public minor release
  - Enable FP16 in multiple Neon™ kernels for multi_isa + v8a
diff --git a/filelist.json b/filelist.json
index fd8277deac..9d24c54b66 100644
--- a/filelist.json
+++ b/filelist.json
@@ -1592,7 +1592,8 @@
             "src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp",
             "src/runtime/NEON/functions/NEGEMM.cpp",
             "src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp",
-            "src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp"
+            "src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp",
+            "src/runtime/experimental/operators/CpuGemm.cpp"
           ],
           "neon": {
             "common": [
diff --git a/src/BUILD.bazel b/src/BUILD.bazel
index 8cb84b3533..22521d1744 100644
--- a/src/BUILD.bazel
+++ b/src/BUILD.bazel
@@ -1021,7 +1021,8 @@ filegroup(
 	"runtime/SubTensor.cpp",
 	"runtime/Tensor.cpp",
 	"runtime/TensorAllocator.cpp",
-	"runtime/Utils.cpp"]  +
+	"runtime/Utils.cpp",
+	"runtime/experimental/operators/CpuGemm.cpp"]  +
     glob(["**/*.h",
     "**/*.hpp",
     "**/*.inl"]),
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 9b6a57c3c7..0285245cfb 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1013,4 +1013,5 @@ target_sources(
 	runtime/Tensor.cpp
 	runtime/TensorAllocator.cpp
 	runtime/Utils.cpp
+	runtime/experimental/operators/CpuGemm.cpp
 )
\ No newline at end of file
diff --git a/src/runtime/experimental/operators/CpuGemm.cpp b/src/runtime/experimental/operators/CpuGemm.cpp
new file mode 100644
index 0000000000..9111367d51
--- /dev/null
+++ b/src/runtime/experimental/operators/CpuGemm.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/experimental/operators/CpuGemm.h"
+
+#include "src/cpu/operators/CpuGemm.h"
+
+namespace arm_compute
+{
+
+namespace experimental
+{
+namespace ops
+{
+
+struct CpuGemm::Impl
+{
+    std::unique_ptr<arm_compute::cpu::CpuGemm> cpu_gemm{nullptr};
+};
+
+CpuGemm::CpuGemm() : _impl(std::make_unique<Impl>())
+{
+    _impl->cpu_gemm = std::make_unique<cpu::CpuGemm>();
+}
+
+CpuGemm::~CpuGemm() = default;
+
+void CpuGemm::configure(const ITensorInfo *a,
+                        const ITensorInfo *b,
+                        const ITensorInfo *c,
+                        ITensorInfo       *d,
+                        float              alpha,
+                        float              beta,
+                        const GEMMInfo    &gemm_info)
+{
+    _impl->cpu_gemm->configure(a, b, c, d, alpha, beta, gemm_info);
+}
+
+Status CpuGemm::validate(const ITensorInfo *a,
+                         const ITensorInfo *b,
+                         const ITensorInfo *c,
+                         const ITensorInfo *d,
+                         float              alpha,
+                         float              beta,
+                         const GEMMInfo    &gemm_info)
+{
+    return cpu::CpuGemm::validate(a, b, c, d, alpha, beta, gemm_info);
+}
+
+Status CpuGemm::has_opt_impl(arm_compute::WeightFormat &weight_format,
+                             const ITensorInfo         *a,
+                             const ITensorInfo         *b,
+                             const ITensorInfo         *c,
+                             const ITensorInfo         *d,
+                             const GEMMInfo            &gemm_info)
+{
+    return cpu::CpuGemm::has_opt_impl(weight_format, a, b, c, d, gemm_info);
+}
+
+void CpuGemm::run(ITensorPack &tensors)
+{
+    _impl->cpu_gemm->run(tensors);
+}
+void CpuGemm::prepare(ITensorPack &constants)
+{
+    _impl->cpu_gemm->prepare(constants);
+}
+experimental::MemoryRequirements CpuGemm::workspace() const
+{
+    return _impl->cpu_gemm->workspace();
+}
+
+} // namespace ops
+} // namespace experimental
+} // namespace arm_compute
diff --git a/tests/BUILD.bazel b/tests/BUILD.bazel
index 5763938d3c..7085f1facc 100644
--- a/tests/BUILD.bazel
+++ b/tests/BUILD.bazel
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 Arm Limited.
+# Copyright (c) 2024 Arm Limited.
 #
 # SPDX-License-Identifier: MIT
 #
@@ -72,6 +72,7 @@ cc_binary(
         "NEON/*.h",
         "validation/NEON/**/*.cpp",
         "validation/NEON/**/*.h",
+        "validation/runtime/experimental/**/*.cpp",
         "*.cpp",
         "datasets/*.h",
         "instruments/*.h",
diff --git a/tests/SConscript b/tests/SConscript
index fe9d6878e4..9f8bb54dec 100644
--- a/tests/SConscript
+++ b/tests/SConscript
@@ -157,6 +157,9 @@ if env['neon']:
         files_validation += Glob(env['external_tests_dir'] + '/tests/validation/NEON/' + filter_pattern)
     files_validation += Glob('validation/cpu/unit/*.cpp')
 
+    # Add wrapper tests
+    files_validation += Glob('validation/runtime/experimental/*/' + filter_pattern)
+
 extra_link_flags = []
 if env['os'] == 'android':
     test_env.Append(LIBS = ["log"])
diff --git a/tests/validation/CMakeLists.txt b/tests/validation/CMakeLists.txt
index 448e96c4f9..b71787db60 100644
--- a/tests/validation/CMakeLists.txt
+++ b/tests/validation/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 Arm Limited.
+# Copyright (c) 2024 Arm Limited.
 #
 # SPDX-License-Identifier: MIT
 #
@@ -142,5 +142,6 @@ if(ENABLE_NEON)
             NEON/UNIT/DynamicTensor.cpp
             NEON/UNIT/TensorAllocator.cpp
             NEON/UNIT/MemoryManager.cpp
-            NEON/UNIT/RuntimeContext.cpp)
+            NEON/UNIT/RuntimeContext.cpp
+            runtime/experimental/operators/CpuGemm.cpp)
 endif()
diff --git a/tests/validation/runtime/experimental/operators/CpuGemm.cpp b/tests/validation/runtime/experimental/operators/CpuGemm.cpp
new file mode 100644
index 0000000000..c6df429a4d
--- /dev/null
+++ b/tests/validation/runtime/experimental/operators/CpuGemm.cpp
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/experimental/operators/CpuGemm.h"
+#include "src/core/helpers/MemoryHelpers.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/fixtures/GEMMFixture.h"
+
+/*
+ * Tests for arm_compute::experimental::ops::CpuGemm which is a shallow wrapper for
+ * arm_compute::cpu::CpuGemm. Any future testing to the functionalities of cpu::CpuGemm will
+ * be tested in tests/NEON/GEMM.cpp given that ops::CpuGemm remain a shallow wrapper.
+*/
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+using framework::dataset::make;
+
+namespace
+{
+/** CNN data types */
+const auto CNNDataTypes = make("DataType",
+{
+    DataType::F32,
+});
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(OPERATORS)
+
+TEST_SUITE(CPUGEMM)
+/** Test case for memory injection in @ref arm_compute::experimental::ops::CpuGemm.
+ *
+ * Configure the operator once and inject memory at run-time in multiple executions.
+ *
+ * Checks performed in order:
+ * - Both runs compute the same output
+ */
+TEST_CASE(OpsCpuGemmMemoryInjection, framework::DatasetMode::ALL)
+{
+    auto       gemm      = std::make_unique<arm_compute::experimental::ops::CpuGemm>();
+    const auto lhs_info  = TensorInfo(TensorShape(3U, 3U), 1, DataType::F32);
+    const auto rhs_info  = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
+    const auto c_info    = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
+    auto       dst_info  = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
+    const auto gemm_info = GEMMInfo{};
+    gemm->configure(&lhs_info, &rhs_info, &c_info, &dst_info, 1.f, 1.f, gemm_info);
+
+    // telhs are newly created every call of this lambda function
+    auto lhs = create_tensor<Tensor>(lhs_info);
+    auto rhs = create_tensor<Tensor>(rhs_info);
+    auto c   = create_tensor<Tensor>(c_info);
+    lhs.allocator()->allocate();
+    rhs.allocator()->allocate();
+    c.allocator()->allocate();
+
+    ITensorPack run_pack{ { TensorType::ACL_SRC_0, &lhs }, { TensorType::ACL_SRC_1, &rhs }, { TensorType::ACL_SRC_2, &c } };
+    ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &rhs }, { TensorType::ACL_SRC_2, &c } };
+
+    auto mg = MemoryGroup{};
+    auto ws = manage_workspace<Tensor>(gemm->workspace(), mg, run_pack, prep_pack);
+
+    auto run_conv = [&]() -> Tensor
+    {
+        auto dst = create_tensor<Tensor>(dst_info);
+        dst.allocator()->allocate();
+        run_pack.add_tensor(TensorType::ACL_DST, &dst);
+
+        library->fill_tensor_value(Accessor(lhs), 1.f);
+        library->fill_tensor_value(Accessor(rhs), 2.f);
+        library->fill_tensor_value(Accessor(c), 3.f);
+        // This operator is configured once and captured by this lambda.
+        gemm->prepare(prep_pack);
+        gemm->run(run_pack);
+        return dst;
+    };
+    auto result_0 = run_conv();
+    auto result_1 = run_conv();
+    for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
+    {
+        ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
+    }
+}
+
+DATA_TEST_CASE(OpsCpuGemmValidateAccumulate, framework::DatasetMode::ALL, combine(
+                                                                     zip(make("In0",{ TensorShape(21U, 13U) }),
+                                                                     make("In1", { TensorShape(33U, 21U) }),
+                                                                     make("Dst", { TensorShape(33U, 13U) })),
+                                                                     zip(
+                                                                     make("alpha", { 1.0, 100.0, 1.0, 1.0 }),
+                                                                     make("beta", { 0.0, 0.0, 1.0, 1.0 }),
+                                                                     make("is_c_null", { false, false, false, true }),
+                                                                     make("Expected", { true, false, false, true }))),
+               shape_a, shape_b, shape_dst, alpha, beta, is_c_null, expected)
+{
+    /* Accumulation test for GEMM kernels */
+    // Create tensors
+    TensorInfo in_a(shape_a, 1, DataType::F32);
+    TensorInfo in_b(shape_b, 1, DataType::F32);
+    TensorInfo in_c(shape_dst, 1, DataType::F32);
+    TensorInfo dst(shape_dst, 1, DataType::F32);
+
+    GEMMInfo gemm_info = GEMMInfo();
+    gemm_info.set_accumulate(true);
+
+    // Validate accumulation
+    arm_compute::experimental::ops::CpuGemm gemm;
+    Status status = gemm.validate(&in_a, &in_b, (is_c_null ? nullptr : &in_c), &dst, alpha, beta, gemm_info);
+    ARM_COMPUTE_EXPECT((expected ==  bool(status)), framework::LogLevel::ERRORS);
+}
+
+TEST_SUITE_END() // CPUGEMM
+TEST_SUITE_END() // OPERATORS
+TEST_SUITE_END() // NEON
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
-- 
cgit v1.2.1