aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyo Suzuki <ryo.suzuki@arm.com>2024-06-19 09:37:24 +0000
committerRyo Suzuki <ryo.suzuki@arm.com>2024-06-25 11:36:57 +0000
commit232c9adefedf62aef591e9cd6316266ee0f29a27 (patch)
treea95fb64de3aec60a8e8239c431c11d2cc4943928
parentfc94f4d23abd4bc427b701f54ad85282e9ec7872 (diff)
downloadComputeLibrary-232c9adefedf62aef591e9cd6316266ee0f29a27.tar.gz
Provide a wrapper class to expose cpu::CpuGemm
This wrapper allows us to utilize the functionality of CpuGemm without directly exposing the source code. Change-Id: I408630f52acd610c912e5c5fa02bfee5f884471e Signed-off-by: Ryo Suzuki <ryo.suzuki@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11607 Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--Android.bp1
-rw-r--r--arm_compute/runtime/experimental/operators/CpuGemm.h134
-rw-r--r--docs/user_guide/release_version_and_change_log.dox1
-rw-r--r--filelist.json3
-rw-r--r--src/BUILD.bazel3
-rw-r--r--src/CMakeLists.txt1
-rw-r--r--src/runtime/experimental/operators/CpuGemm.cpp96
-rw-r--r--tests/BUILD.bazel3
-rw-r--r--tests/SConscript3
-rw-r--r--tests/validation/CMakeLists.txt5
-rw-r--r--tests/validation/runtime/experimental/operators/CpuGemm.cpp143
11 files changed, 388 insertions, 5 deletions
diff --git a/Android.bp b/Android.bp
index bab5015322..edb494f121 100644
--- a/Android.bp
+++ b/Android.bp
@@ -1012,6 +1012,7 @@ cc_library_static {
"src/runtime/Tensor.cpp",
"src/runtime/TensorAllocator.cpp",
"src/runtime/Utils.cpp",
+ "src/runtime/experimental/operators/CpuGemm.cpp",
"src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.cpp",
"src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.cpp",
"src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.cpp",
diff --git a/arm_compute/runtime/experimental/operators/CpuGemm.h b/arm_compute/runtime/experimental/operators/CpuGemm.h
new file mode 100644
index 0000000000..e397cbf006
--- /dev/null
+++ b/arm_compute/runtime/experimental/operators/CpuGemm.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUGEMM_H
+#define ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUGEMM_H
+
+#include "arm_compute/core/ITensorPack.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/function_info/GEMMInfo.h"
+#include "arm_compute/runtime/IOperator.h"
+
+/*
+ * A shallow wrapper for arm_compute::cpu::CpuGemm.
+ * Any new features should be added to arm_compute::cpu::CpuGemm and
+ * arm_compute::experimental::ops::CpuGemm should remain a shallow wrapper.
+*/
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace ops
+{
+/** Wrapper class for CpuGemm. For information on the operators,
+ * see "src/cpu/operators/CpuGemm.h"
+*/
+class CpuGemm : IOperator
+{
+public:
+ /** Constructor **/
+ CpuGemm();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CpuGemm(const CpuGemm &) = delete;
+ /** Default move constructor */
+ CpuGemm(CpuGemm &&) = default;
+ /** Default destructor */
+ ~CpuGemm();
+
+ /** Configure operator for a given list of arguments
+ *
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |a |b |c |d |
+ * |:------------|:-----------|:---------|:--------------|
+ * |F32 |F32 |F32 |F32 |
+ * |F16 |F16 |F16 |F16 |
+ * |BFLOAT16 |BFLOAT16 |BFLOAT16 |FP32 |
+ *
+ * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
+ * @note GEMM: The tensors a, b, c, d must have the same data type. You should not mix data types when calling this function.
+ *
+ * @note Batched GEMM only supports broadcasting cases where RHS rank < LHS rank but not the other way around
+ *
+ * @param[in] a First input tensor info (Matrix A or Vector A). Data type supported: BFLOAT16/F16/F32
+ * @param[in] b Second input tensor info (Matrix B). Data type supported: same as @p a
+ * @param[in] c Third input tensor info (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a
+ * @param[out] d Output tensor info. Data type supported: same as @p a
+ * @param[in] alpha Weight of the matrix product
+ * @param[in] beta Weight of matrix C
+ * @param[in, out] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
+ * if the reshape of matrix B should happen only for the first run
+ */
+ void configure(const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ ITensorInfo *d,
+ float alpha,
+ float beta,
+ const GEMMInfo &gemm_info = GEMMInfo());
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CpuGemm.
+ *
+ * Similar to @ref CpuGemm::configure()
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ const ITensorInfo *d,
+ float alpha,
+ float beta,
+ const GEMMInfo &gemm_info = GEMMInfo());
+
+ /** Indicates whether or not there is an optimal assembly implementation that can be used to process the given parameters.
+ *
+ * This method has the same use of @ref
+ * NEGEMMConvolutionLayer::has_opt_impl, with the only caveat that
+ * the value of arm_compute::WeightFormat need to be passed via the
+ * parameter gemm_info.
+ */
+ static Status has_opt_impl(arm_compute::WeightFormat &weight_format,
+ const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ const ITensorInfo *d,
+ const GEMMInfo &gemm_info = GEMMInfo());
+
+ void run(ITensorPack &tensors);
+ void prepare(ITensorPack &constants);
+ experimental::MemoryRequirements workspace() const;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
+};
+} // namespace ops
+} // namespace experimental
+} // namespace arm_compute
+
+#endif // ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUGEMM_H
diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox
index a82520a5b2..8047938425 100644
--- a/docs/user_guide/release_version_and_change_log.dox
+++ b/docs/user_guide/release_version_and_change_log.dox
@@ -49,6 +49,7 @@ v24.07 Public major release
- Remove unused "get_default_activation_values" functions.
- Add SVE fixed format interleaved BF16 DOT kernel.
- Updates and optimizations to assembly kernels.
+ - Expose CpuGemm functionality using the experimental operators api
v24.06 Public minor release
- Enable FP16 in multiple Neon™ kernels for multi_isa + v8a
diff --git a/filelist.json b/filelist.json
index fd8277deac..9d24c54b66 100644
--- a/filelist.json
+++ b/filelist.json
@@ -1592,7 +1592,8 @@
"src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp",
"src/runtime/NEON/functions/NEGEMM.cpp",
"src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp",
- "src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp"
+ "src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp",
+ "src/runtime/experimental/operators/CpuGemm.cpp"
],
"neon": {
"common": [
diff --git a/src/BUILD.bazel b/src/BUILD.bazel
index 8cb84b3533..22521d1744 100644
--- a/src/BUILD.bazel
+++ b/src/BUILD.bazel
@@ -1021,7 +1021,8 @@ filegroup(
"runtime/SubTensor.cpp",
"runtime/Tensor.cpp",
"runtime/TensorAllocator.cpp",
- "runtime/Utils.cpp"] +
+ "runtime/Utils.cpp",
+ "runtime/experimental/operators/CpuGemm.cpp"] +
glob(["**/*.h",
"**/*.hpp",
"**/*.inl"]),
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 9b6a57c3c7..0285245cfb 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1013,4 +1013,5 @@ target_sources(
runtime/Tensor.cpp
runtime/TensorAllocator.cpp
runtime/Utils.cpp
+ runtime/experimental/operators/CpuGemm.cpp
) \ No newline at end of file
diff --git a/src/runtime/experimental/operators/CpuGemm.cpp b/src/runtime/experimental/operators/CpuGemm.cpp
new file mode 100644
index 0000000000..9111367d51
--- /dev/null
+++ b/src/runtime/experimental/operators/CpuGemm.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/experimental/operators/CpuGemm.h"
+
+#include "src/cpu/operators/CpuGemm.h"
+
+namespace arm_compute
+{
+
+namespace experimental
+{
+namespace ops
+{
+
+struct CpuGemm::Impl
+{
+ std::unique_ptr<arm_compute::cpu::CpuGemm> cpu_gemm{nullptr};
+};
+
+CpuGemm::CpuGemm() : _impl(std::make_unique<Impl>())
+{
+ _impl->cpu_gemm = std::make_unique<cpu::CpuGemm>();
+}
+
+CpuGemm::~CpuGemm() = default;
+
+void CpuGemm::configure(const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ ITensorInfo *d,
+ float alpha,
+ float beta,
+ const GEMMInfo &gemm_info)
+{
+ _impl->cpu_gemm->configure(a, b, c, d, alpha, beta, gemm_info);
+}
+
+Status CpuGemm::validate(const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ const ITensorInfo *d,
+ float alpha,
+ float beta,
+ const GEMMInfo &gemm_info)
+{
+ return cpu::CpuGemm::validate(a, b, c, d, alpha, beta, gemm_info);
+}
+
+Status CpuGemm::has_opt_impl(arm_compute::WeightFormat &weight_format,
+ const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ const ITensorInfo *d,
+ const GEMMInfo &gemm_info)
+{
+ return cpu::CpuGemm::has_opt_impl(weight_format, a, b, c, d, gemm_info);
+}
+
+void CpuGemm::run(ITensorPack &tensors)
+{
+ _impl->cpu_gemm->run(tensors);
+}
+void CpuGemm::prepare(ITensorPack &constants)
+{
+ _impl->cpu_gemm->prepare(constants);
+}
+experimental::MemoryRequirements CpuGemm::workspace() const
+{
+ return _impl->cpu_gemm->workspace();
+}
+
+} // namespace ops
+} // namespace experimental
+} // namespace arm_compute
diff --git a/tests/BUILD.bazel b/tests/BUILD.bazel
index 5763938d3c..7085f1facc 100644
--- a/tests/BUILD.bazel
+++ b/tests/BUILD.bazel
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 Arm Limited.
+# Copyright (c) 2024 Arm Limited.
#
# SPDX-License-Identifier: MIT
#
@@ -72,6 +72,7 @@ cc_binary(
"NEON/*.h",
"validation/NEON/**/*.cpp",
"validation/NEON/**/*.h",
+ "validation/runtime/experimental/**/*.cpp",
"*.cpp",
"datasets/*.h",
"instruments/*.h",
diff --git a/tests/SConscript b/tests/SConscript
index fe9d6878e4..9f8bb54dec 100644
--- a/tests/SConscript
+++ b/tests/SConscript
@@ -157,6 +157,9 @@ if env['neon']:
files_validation += Glob(env['external_tests_dir'] + '/tests/validation/NEON/' + filter_pattern)
files_validation += Glob('validation/cpu/unit/*.cpp')
+ # Add wrapper tests
+ files_validation += Glob('validation/runtime/experimental/*/' + filter_pattern)
+
extra_link_flags = []
if env['os'] == 'android':
test_env.Append(LIBS = ["log"])
diff --git a/tests/validation/CMakeLists.txt b/tests/validation/CMakeLists.txt
index 448e96c4f9..b71787db60 100644
--- a/tests/validation/CMakeLists.txt
+++ b/tests/validation/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 Arm Limited.
+# Copyright (c) 2024 Arm Limited.
#
# SPDX-License-Identifier: MIT
#
@@ -142,5 +142,6 @@ if(ENABLE_NEON)
NEON/UNIT/DynamicTensor.cpp
NEON/UNIT/TensorAllocator.cpp
NEON/UNIT/MemoryManager.cpp
- NEON/UNIT/RuntimeContext.cpp)
+ NEON/UNIT/RuntimeContext.cpp
+ runtime/experimental/operators/CpuGemm.cpp)
endif()
diff --git a/tests/validation/runtime/experimental/operators/CpuGemm.cpp b/tests/validation/runtime/experimental/operators/CpuGemm.cpp
new file mode 100644
index 0000000000..c6df429a4d
--- /dev/null
+++ b/tests/validation/runtime/experimental/operators/CpuGemm.cpp
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/experimental/operators/CpuGemm.h"
+#include "src/core/helpers/MemoryHelpers.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/fixtures/GEMMFixture.h"
+
+/*
+ * Tests for arm_compute::experimental::ops::CpuGemm which is a shallow wrapper for
+ * arm_compute::cpu::CpuGemm. Any future testing to the functionalities of cpu::CpuGemm will
+ * be tested in tests/NEON/GEMM.cpp given that ops::CpuGemm remain a shallow wrapper.
+*/
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+using framework::dataset::make;
+
+namespace
+{
+/** CNN data types */
+const auto CNNDataTypes = make("DataType",
+{
+ DataType::F32,
+});
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(OPERATORS)
+
+TEST_SUITE(CPUGEMM)
+/** Test case for memory injection in @ref arm_compute::experimental::ops::CpuGemm.
+ *
+ * Configure the operator once and inject memory at run-time in multiple executions.
+ *
+ * Checks performed in order:
+ * - Both runs compute the same output
+ */
+TEST_CASE(OpsCpuGemmMemoryInjection, framework::DatasetMode::ALL)
+{
+ auto gemm = std::make_unique<arm_compute::experimental::ops::CpuGemm>();
+ const auto lhs_info = TensorInfo(TensorShape(3U, 3U), 1, DataType::F32);
+ const auto rhs_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
+ const auto c_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
+ auto dst_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
+ const auto gemm_info = GEMMInfo{};
+ gemm->configure(&lhs_info, &rhs_info, &c_info, &dst_info, 1.f, 1.f, gemm_info);
+
+ // telhs are newly created every call of this lambda function
+ auto lhs = create_tensor<Tensor>(lhs_info);
+ auto rhs = create_tensor<Tensor>(rhs_info);
+ auto c = create_tensor<Tensor>(c_info);
+ lhs.allocator()->allocate();
+ rhs.allocator()->allocate();
+ c.allocator()->allocate();
+
+ ITensorPack run_pack{ { TensorType::ACL_SRC_0, &lhs }, { TensorType::ACL_SRC_1, &rhs }, { TensorType::ACL_SRC_2, &c } };
+ ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &rhs }, { TensorType::ACL_SRC_2, &c } };
+
+ auto mg = MemoryGroup{};
+ auto ws = manage_workspace<Tensor>(gemm->workspace(), mg, run_pack, prep_pack);
+
+ auto run_conv = [&]() -> Tensor
+ {
+ auto dst = create_tensor<Tensor>(dst_info);
+ dst.allocator()->allocate();
+ run_pack.add_tensor(TensorType::ACL_DST, &dst);
+
+ library->fill_tensor_value(Accessor(lhs), 1.f);
+ library->fill_tensor_value(Accessor(rhs), 2.f);
+ library->fill_tensor_value(Accessor(c), 3.f);
+ // This operator is configured once and captured by this lambda.
+ gemm->prepare(prep_pack);
+ gemm->run(run_pack);
+ return dst;
+ };
+ auto result_0 = run_conv();
+ auto result_1 = run_conv();
+ for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
+ {
+ ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
+ }
+}
+
+DATA_TEST_CASE(OpsCpuGemmValidateAccumulate, framework::DatasetMode::ALL, combine(
+ zip(make("In0",{ TensorShape(21U, 13U) }),
+ make("In1", { TensorShape(33U, 21U) }),
+ make("Dst", { TensorShape(33U, 13U) })),
+ zip(
+ make("alpha", { 1.0, 100.0, 1.0, 1.0 }),
+ make("beta", { 0.0, 0.0, 1.0, 1.0 }),
+ make("is_c_null", { false, false, false, true }),
+ make("Expected", { true, false, false, true }))),
+ shape_a, shape_b, shape_dst, alpha, beta, is_c_null, expected)
+{
+ /* Accumulation test for GEMM kernels */
+ // Create tensors
+ TensorInfo in_a(shape_a, 1, DataType::F32);
+ TensorInfo in_b(shape_b, 1, DataType::F32);
+ TensorInfo in_c(shape_dst, 1, DataType::F32);
+ TensorInfo dst(shape_dst, 1, DataType::F32);
+
+ GEMMInfo gemm_info = GEMMInfo();
+ gemm_info.set_accumulate(true);
+
+ // Validate accumulation
+ arm_compute::experimental::ops::CpuGemm gemm;
+ Status status = gemm.validate(&in_a, &in_b, (is_c_null ? nullptr : &in_c), &dst, alpha, beta, gemm_info);
+ ARM_COMPUTE_EXPECT((expected == bool(status)), framework::LogLevel::ERRORS);
+}
+
+TEST_SUITE_END() // CPUGEMM
+TEST_SUITE_END() // OPERATORS
+TEST_SUITE_END() // NEON
+} // namespace validation
+} // namespace test
+} // namespace arm_compute