From 232c9adefedf62aef591e9cd6316266ee0f29a27 Mon Sep 17 00:00:00 2001 From: Ryo Suzuki Date: Wed, 19 Jun 2024 09:37:24 +0000 Subject: Provide a wrapper class to expose cpu::CpuGemm This wrapper allows us to utilize the functionality of CpuGemm without directly exposing the source code. Change-Id: I408630f52acd610c912e5c5fa02bfee5f884471e Signed-off-by: Ryo Suzuki Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11607 Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- Android.bp | 1 + .../runtime/experimental/operators/CpuGemm.h | 134 +++++++++++++++++++ docs/user_guide/release_version_and_change_log.dox | 1 + filelist.json | 3 +- src/BUILD.bazel | 3 +- src/CMakeLists.txt | 1 + src/runtime/experimental/operators/CpuGemm.cpp | 96 ++++++++++++++ tests/BUILD.bazel | 3 +- tests/SConscript | 3 + tests/validation/CMakeLists.txt | 5 +- .../runtime/experimental/operators/CpuGemm.cpp | 143 +++++++++++++++++++++ 11 files changed, 388 insertions(+), 5 deletions(-) create mode 100644 arm_compute/runtime/experimental/operators/CpuGemm.h create mode 100644 src/runtime/experimental/operators/CpuGemm.cpp create mode 100644 tests/validation/runtime/experimental/operators/CpuGemm.cpp diff --git a/Android.bp b/Android.bp index bab5015322..edb494f121 100644 --- a/Android.bp +++ b/Android.bp @@ -1012,6 +1012,7 @@ cc_library_static { "src/runtime/Tensor.cpp", "src/runtime/TensorAllocator.cpp", "src/runtime/Utils.cpp", + "src/runtime/experimental/operators/CpuGemm.cpp", "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.cpp", "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.cpp", "src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.cpp", diff --git a/arm_compute/runtime/experimental/operators/CpuGemm.h b/arm_compute/runtime/experimental/operators/CpuGemm.h new file mode 100644 index 0000000000..e397cbf006 --- /dev/null +++ b/arm_compute/runtime/experimental/operators/CpuGemm.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUGEMM_H +#define ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUGEMM_H + +#include "arm_compute/core/ITensorPack.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/function_info/GEMMInfo.h" +#include "arm_compute/runtime/IOperator.h" + +/* + * A shallow wrapper for arm_compute::cpu::CpuGemm. + * Any new features should be added to arm_compute::cpu::CpuGemm and + * arm_compute::experimental::ops::CpuGemm should remain a shallow wrapper. +*/ + +namespace arm_compute +{ +namespace experimental +{ +namespace ops +{ +/** Wrapper class for CpuGemm. For information on the operators, + * see "src/cpu/operators/CpuGemm.h" +*/ +class CpuGemm : IOperator +{ +public: + /** Constructor **/ + CpuGemm(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CpuGemm(const CpuGemm &) = delete; + /** Default move constructor */ + CpuGemm(CpuGemm &&) = default; + /** Default destructor */ + ~CpuGemm(); + + /** Configure operator for a given list of arguments + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |a |b |c |d | + * |:------------|:-----------|:---------|:--------------| + * |F32 |F32 |F32 |F32 | + * |F16 |F16 |F16 |F16 | + * |BFLOAT16 |BFLOAT16 |BFLOAT16 |FP32 | + * + * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C]. + * @note GEMM: The tensors a, b, c, d must have the same data type. You should not mix data types when calling this function. + * + * @note Batched GEMM only supports broadcasting cases where RHS rank < LHS rank but not the other way around + * + * @param[in] a First input tensor info (Matrix A or Vector A). Data type supported: BFLOAT16/F16/F32 + * @param[in] b Second input tensor info (Matrix B). Data type supported: same as @p a + * @param[in] c Third input tensor info (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a + * @param[out] d Output tensor info. Data type supported: same as @p a + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of matrix C + * @param[in, out] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and + * if the reshape of matrix B should happen only for the first run + */ + void configure(const ITensorInfo *a, + const ITensorInfo *b, + const ITensorInfo *c, + ITensorInfo *d, + float alpha, + float beta, + const GEMMInfo &gemm_info = GEMMInfo()); + + /** Static function to check if given info will lead to a valid configuration of @ref CpuGemm. + * + * Similar to @ref CpuGemm::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *a, + const ITensorInfo *b, + const ITensorInfo *c, + const ITensorInfo *d, + float alpha, + float beta, + const GEMMInfo &gemm_info = GEMMInfo()); + + /** Indicates whether or not there is an optimal assembly implementation that can be used to process the given parameters. + * + * This method has the same use of @ref + * NEGEMMConvolutionLayer::has_opt_impl, with the only caveat that + * the value of arm_compute::WeightFormat need to be passed via the + * parameter gemm_info. + */ + static Status has_opt_impl(arm_compute::WeightFormat &weight_format, + const ITensorInfo *a, + const ITensorInfo *b, + const ITensorInfo *c, + const ITensorInfo *d, + const GEMMInfo &gemm_info = GEMMInfo()); + + void run(ITensorPack &tensors); + void prepare(ITensorPack &constants); + experimental::MemoryRequirements workspace() const; + +private: + struct Impl; + std::unique_ptr _impl; +}; +} // namespace ops +} // namespace experimental +} // namespace arm_compute + +#endif // ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUGEMM_H diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox index a82520a5b2..8047938425 100644 --- a/docs/user_guide/release_version_and_change_log.dox +++ b/docs/user_guide/release_version_and_change_log.dox @@ -49,6 +49,7 @@ v24.07 Public major release - Remove unused "get_default_activation_values" functions. - Add SVE fixed format interleaved BF16 DOT kernel. - Updates and optimizations to assembly kernels. + - Expose CpuGemm functionality using the experimental operators api v24.06 Public minor release - Enable FP16 in multiple Neon™ kernels for multi_isa + v8a diff --git a/filelist.json b/filelist.json index fd8277deac..9d24c54b66 100644 --- a/filelist.json +++ b/filelist.json @@ -1592,7 +1592,8 @@ "src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp", "src/runtime/NEON/functions/NEGEMM.cpp", "src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp", - "src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp" + "src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp", + "src/runtime/experimental/operators/CpuGemm.cpp" ], "neon": { "common": [ diff --git a/src/BUILD.bazel b/src/BUILD.bazel index 8cb84b3533..22521d1744 100644 --- a/src/BUILD.bazel +++ b/src/BUILD.bazel @@ -1021,7 +1021,8 @@ filegroup( "runtime/SubTensor.cpp", "runtime/Tensor.cpp", "runtime/TensorAllocator.cpp", - "runtime/Utils.cpp"] + + "runtime/Utils.cpp", + "runtime/experimental/operators/CpuGemm.cpp"] + glob(["**/*.h", "**/*.hpp", "**/*.inl"]), diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9b6a57c3c7..0285245cfb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1013,4 +1013,5 @@ target_sources( runtime/Tensor.cpp runtime/TensorAllocator.cpp runtime/Utils.cpp + runtime/experimental/operators/CpuGemm.cpp ) \ No newline at end of file diff --git a/src/runtime/experimental/operators/CpuGemm.cpp b/src/runtime/experimental/operators/CpuGemm.cpp new file mode 100644 index 0000000000..9111367d51 --- /dev/null +++ b/src/runtime/experimental/operators/CpuGemm.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/experimental/operators/CpuGemm.h" + +#include "src/cpu/operators/CpuGemm.h" + +namespace arm_compute +{ + +namespace experimental +{ +namespace ops +{ + +struct CpuGemm::Impl +{ + std::unique_ptr cpu_gemm{nullptr}; +}; + +CpuGemm::CpuGemm() : _impl(std::make_unique()) +{ + _impl->cpu_gemm = std::make_unique(); +} + +CpuGemm::~CpuGemm() = default; + +void CpuGemm::configure(const ITensorInfo *a, + const ITensorInfo *b, + const ITensorInfo *c, + ITensorInfo *d, + float alpha, + float beta, + const GEMMInfo &gemm_info) +{ + _impl->cpu_gemm->configure(a, b, c, d, alpha, beta, gemm_info); +} + +Status CpuGemm::validate(const ITensorInfo *a, + const ITensorInfo *b, + const ITensorInfo *c, + const ITensorInfo *d, + float alpha, + float beta, + const GEMMInfo &gemm_info) +{ + return cpu::CpuGemm::validate(a, b, c, d, alpha, beta, gemm_info); +} + +Status CpuGemm::has_opt_impl(arm_compute::WeightFormat &weight_format, + const ITensorInfo *a, + const ITensorInfo *b, + const ITensorInfo *c, + const ITensorInfo *d, + const GEMMInfo &gemm_info) +{ + return cpu::CpuGemm::has_opt_impl(weight_format, a, b, c, d, gemm_info); +} + +void CpuGemm::run(ITensorPack &tensors) +{ + _impl->cpu_gemm->run(tensors); +} +void CpuGemm::prepare(ITensorPack &constants) +{ + _impl->cpu_gemm->prepare(constants); +} +experimental::MemoryRequirements CpuGemm::workspace() const +{ + return _impl->cpu_gemm->workspace(); +} + +} // namespace ops +} // namespace experimental +} // namespace arm_compute diff --git a/tests/BUILD.bazel b/tests/BUILD.bazel index 5763938d3c..7085f1facc 100644 --- a/tests/BUILD.bazel +++ b/tests/BUILD.bazel @@ -1,4 +1,4 @@ -# Copyright (c) 2023 Arm Limited. +# Copyright (c) 2024 Arm Limited. # # SPDX-License-Identifier: MIT # @@ -72,6 +72,7 @@ cc_binary( "NEON/*.h", "validation/NEON/**/*.cpp", "validation/NEON/**/*.h", + "validation/runtime/experimental/**/*.cpp", "*.cpp", "datasets/*.h", "instruments/*.h", diff --git a/tests/SConscript b/tests/SConscript index fe9d6878e4..9f8bb54dec 100644 --- a/tests/SConscript +++ b/tests/SConscript @@ -157,6 +157,9 @@ if env['neon']: files_validation += Glob(env['external_tests_dir'] + '/tests/validation/NEON/' + filter_pattern) files_validation += Glob('validation/cpu/unit/*.cpp') + # Add wrapper tests + files_validation += Glob('validation/runtime/experimental/*/' + filter_pattern) + extra_link_flags = [] if env['os'] == 'android': test_env.Append(LIBS = ["log"]) diff --git a/tests/validation/CMakeLists.txt b/tests/validation/CMakeLists.txt index 448e96c4f9..b71787db60 100644 --- a/tests/validation/CMakeLists.txt +++ b/tests/validation/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2023 Arm Limited. +# Copyright (c) 2024 Arm Limited. # # SPDX-License-Identifier: MIT # @@ -142,5 +142,6 @@ if(ENABLE_NEON) NEON/UNIT/DynamicTensor.cpp NEON/UNIT/TensorAllocator.cpp NEON/UNIT/MemoryManager.cpp - NEON/UNIT/RuntimeContext.cpp) + NEON/UNIT/RuntimeContext.cpp + runtime/experimental/operators/CpuGemm.cpp) endif() diff --git a/tests/validation/runtime/experimental/operators/CpuGemm.cpp b/tests/validation/runtime/experimental/operators/CpuGemm.cpp new file mode 100644 index 0000000000..c6df429a4d --- /dev/null +++ b/tests/validation/runtime/experimental/operators/CpuGemm.cpp @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/experimental/operators/CpuGemm.h" +#include "src/core/helpers/MemoryHelpers.h" +#include "tests/NEON/Accessor.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/fixtures/GEMMFixture.h" + +/* + * Tests for arm_compute::experimental::ops::CpuGemm which is a shallow wrapper for + * arm_compute::cpu::CpuGemm. Any future testing to the functionalities of cpu::CpuGemm will + * be tested in tests/NEON/GEMM.cpp given that ops::CpuGemm remain a shallow wrapper. +*/ + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +using framework::dataset::make; + +namespace +{ +/** CNN data types */ +const auto CNNDataTypes = make("DataType", +{ + DataType::F32, +}); +} // namespace + +TEST_SUITE(NEON) +TEST_SUITE(OPERATORS) + +TEST_SUITE(CPUGEMM) +/** Test case for memory injection in @ref arm_compute::experimental::ops::CpuGemm. + * + * Configure the operator once and inject memory at run-time in multiple executions. + * + * Checks performed in order: + * - Both runs compute the same output + */ +TEST_CASE(OpsCpuGemmMemoryInjection, framework::DatasetMode::ALL) +{ + auto gemm = std::make_unique(); + const auto lhs_info = TensorInfo(TensorShape(3U, 3U), 1, DataType::F32); + const auto rhs_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32); + const auto c_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32); + auto dst_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32); + const auto gemm_info = GEMMInfo{}; + gemm->configure(&lhs_info, &rhs_info, &c_info, &dst_info, 1.f, 1.f, gemm_info); + + // telhs are newly created every call of this lambda function + auto lhs = create_tensor(lhs_info); + auto rhs = create_tensor(rhs_info); + auto c = create_tensor(c_info); + lhs.allocator()->allocate(); + rhs.allocator()->allocate(); + c.allocator()->allocate(); + + ITensorPack run_pack{ { TensorType::ACL_SRC_0, &lhs }, { TensorType::ACL_SRC_1, &rhs }, { TensorType::ACL_SRC_2, &c } }; + ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &rhs }, { TensorType::ACL_SRC_2, &c } }; + + auto mg = MemoryGroup{}; + auto ws = manage_workspace(gemm->workspace(), mg, run_pack, prep_pack); + + auto run_conv = [&]() -> Tensor + { + auto dst = create_tensor(dst_info); + dst.allocator()->allocate(); + run_pack.add_tensor(TensorType::ACL_DST, &dst); + + library->fill_tensor_value(Accessor(lhs), 1.f); + library->fill_tensor_value(Accessor(rhs), 2.f); + library->fill_tensor_value(Accessor(c), 3.f); + // This operator is configured once and captured by this lambda. + gemm->prepare(prep_pack); + gemm->run(run_pack); + return dst; + }; + auto result_0 = run_conv(); + auto result_1 = run_conv(); + for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) + { + ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS); + } +} + +DATA_TEST_CASE(OpsCpuGemmValidateAccumulate, framework::DatasetMode::ALL, combine( + zip(make("In0",{ TensorShape(21U, 13U) }), + make("In1", { TensorShape(33U, 21U) }), + make("Dst", { TensorShape(33U, 13U) })), + zip( + make("alpha", { 1.0, 100.0, 1.0, 1.0 }), + make("beta", { 0.0, 0.0, 1.0, 1.0 }), + make("is_c_null", { false, false, false, true }), + make("Expected", { true, false, false, true }))), + shape_a, shape_b, shape_dst, alpha, beta, is_c_null, expected) +{ + /* Accumulation test for GEMM kernels */ + // Create tensors + TensorInfo in_a(shape_a, 1, DataType::F32); + TensorInfo in_b(shape_b, 1, DataType::F32); + TensorInfo in_c(shape_dst, 1, DataType::F32); + TensorInfo dst(shape_dst, 1, DataType::F32); + + GEMMInfo gemm_info = GEMMInfo(); + gemm_info.set_accumulate(true); + + // Validate accumulation + arm_compute::experimental::ops::CpuGemm gemm; + Status status = gemm.validate(&in_a, &in_b, (is_c_null ? nullptr : &in_c), &dst, alpha, beta, gemm_info); + ARM_COMPUTE_EXPECT((expected == bool(status)), framework::LogLevel::ERRORS); +} + +TEST_SUITE_END() // CPUGEMM +TEST_SUITE_END() // OPERATORS +TEST_SUITE_END() // NEON +} // namespace validation +} // namespace test +} // namespace arm_compute -- cgit v1.2.1