diff options
Diffstat (limited to 'src/runtime')
-rw-r--r-- | src/runtime/OMP/OMPScheduler.cpp | 35 | ||||
-rw-r--r-- | src/runtime/experimental/operators/CpuGemm.cpp | 96 |
2 files changed, 126 insertions, 5 deletions
diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp index d4d6193fce..baffa8cbb2 100644 --- a/src/runtime/OMP/OMPScheduler.cpp +++ b/src/runtime/OMP/OMPScheduler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,10 +32,21 @@ namespace arm_compute { +#if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__) OMPScheduler::OMPScheduler() // NOLINT - : _num_threads(omp_get_max_threads()) + : _num_threads(cpu_info().get_cpu_num_excluding_little()), + _nonlittle_num_cpus(cpu_info().get_cpu_num_excluding_little()) { } +#else /* !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/ +OMPScheduler::OMPScheduler() // NOLINT + : _num_threads(omp_get_max_threads()), _nonlittle_num_cpus(cpu_info().get_cpu_num_excluding_little()) +{ +} +#endif /* !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/ unsigned int OMPScheduler::num_threads() const { @@ -45,7 +56,15 @@ unsigned int OMPScheduler::num_threads() const void OMPScheduler::set_num_threads(unsigned int num_threads) { const unsigned int num_cores = omp_get_max_threads(); - _num_threads = (num_threads == 0) ? num_cores : num_threads; +#if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__) + const unsigned int adjusted_num_threads = std::min(_nonlittle_num_cpus, num_threads); + _num_threads = (num_threads == 0) ? num_cores : adjusted_num_threads; +#else /* !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/ + _num_threads = (num_threads == 0) ? num_cores : num_threads; +#endif /* !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/ } void OMPScheduler::schedule(ICPPKernel *kernel, const Hints &hints) @@ -99,9 +118,15 @@ void OMPScheduler::run_workloads(std::vector<arm_compute::IScheduler::Workload> } ThreadInfo info; - info.cpu_info = &cpu_info(); + info.cpu_info = &cpu_info(); + +#if !defined(__ANDROID__) + info.num_threads = _num_threads; +#else /* !__ANDROID__ */ info.num_threads = num_threads_to_use; -#pragma omp parallel for firstprivate(info) num_threads(num_threads_to_use) default(shared) proc_bind(close) \ +#endif /* __ANDROID__ */ + +#pragma omp parallel for firstprivate(info) num_threads(info.num_threads) default(shared) proc_bind(close) \ schedule(static, 1) for (unsigned int wid = 0; wid < amount_of_work; ++wid) { diff --git a/src/runtime/experimental/operators/CpuGemm.cpp b/src/runtime/experimental/operators/CpuGemm.cpp new file mode 100644 index 0000000000..9111367d51 --- /dev/null +++ b/src/runtime/experimental/operators/CpuGemm.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/experimental/operators/CpuGemm.h" + +#include "src/cpu/operators/CpuGemm.h" + +namespace arm_compute +{ + +namespace experimental +{ +namespace ops +{ + +struct CpuGemm::Impl +{ + std::unique_ptr<arm_compute::cpu::CpuGemm> cpu_gemm{nullptr}; +}; + +CpuGemm::CpuGemm() : _impl(std::make_unique<Impl>()) +{ + _impl->cpu_gemm = std::make_unique<cpu::CpuGemm>(); +} + +CpuGemm::~CpuGemm() = default; + +void CpuGemm::configure(const ITensorInfo *a, + const ITensorInfo *b, + const ITensorInfo *c, + ITensorInfo *d, + float alpha, + float beta, + const GEMMInfo &gemm_info) +{ + _impl->cpu_gemm->configure(a, b, c, d, alpha, beta, gemm_info); +} + +Status CpuGemm::validate(const ITensorInfo *a, + const ITensorInfo *b, + const ITensorInfo *c, + const ITensorInfo *d, + float alpha, + float beta, + const GEMMInfo &gemm_info) +{ + return cpu::CpuGemm::validate(a, b, c, d, alpha, beta, gemm_info); +} + +Status CpuGemm::has_opt_impl(arm_compute::WeightFormat &weight_format, + const ITensorInfo *a, + const ITensorInfo *b, + const ITensorInfo *c, + const ITensorInfo *d, + const GEMMInfo &gemm_info) +{ + return cpu::CpuGemm::has_opt_impl(weight_format, a, b, c, d, gemm_info); +} + +void CpuGemm::run(ITensorPack &tensors) +{ + _impl->cpu_gemm->run(tensors); +} +void CpuGemm::prepare(ITensorPack &constants) +{ + _impl->cpu_gemm->prepare(constants); +} +experimental::MemoryRequirements CpuGemm::workspace() const +{ + return _impl->cpu_gemm->workspace(); +} + +} // namespace ops +} // namespace experimental +} // namespace arm_compute |