diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/common/cpuinfo/CpuInfo.cpp | 50 | ||||
-rw-r--r-- | src/common/cpuinfo/CpuInfo.h | 9 | ||||
-rw-r--r-- | src/core/CPP/CPPTypes.cpp | 14 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp | 6 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp | 6 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp | 6 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp | 6 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp | 6 | ||||
-rw-r--r-- | src/core/helpers/LUTManager.cpp | 27 | ||||
-rw-r--r-- | src/core/helpers/LUTManager.h | 18 | ||||
-rw-r--r-- | src/cpu/kernels/CpuActivationKernel.cpp | 18 | ||||
-rw-r--r-- | src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp | 6 | ||||
-rw-r--r-- | src/runtime/OMP/OMPScheduler.cpp | 35 |
13 files changed, 161 insertions, 46 deletions
diff --git a/src/common/cpuinfo/CpuInfo.cpp b/src/common/cpuinfo/CpuInfo.cpp index 809ab3e2c3..d46d8d7773 100644 --- a/src/common/cpuinfo/CpuInfo.cpp +++ b/src/common/cpuinfo/CpuInfo.cpp @@ -29,6 +29,7 @@ #include "support/StringSupport.h" #include "support/ToolchainSupport.h" +#include <map> #include <sstream> #if !defined(BARE_METAL) @@ -269,6 +270,46 @@ int get_max_cpus() } return max_cpus; } +#if defined(__ANDROID__) +std::vector<uint32_t> get_cpu_capacities() +{ + std::vector<uint32_t> cpu_capacities; + for (int i = 0; i < get_max_cpus(); ++i) + { + std::stringstream str; + str << "/sys/devices/system/cpu/cpu" << i << "/cpu_capacity"; + std::ifstream file(str.str(), std::ios::in); + if (file.is_open()) + { + std::string line; + if (bool(getline(file, line))) + { + cpu_capacities.emplace_back(support::cpp11::stoul(line)); + } + } + } + + return cpu_capacities; +} + +uint32_t not_little_num_cpus_internal() +{ + std::vector<uint32_t> cpus_all = get_cpu_capacities(); + std::vector<uint32_t> cpus_not_little; + + std::vector<uint32_t>::iterator result = std::max_element(cpus_all.begin(), cpus_all.end()); + uint32_t max_capacity = *result; + uint32_t threshold = max_capacity / 2; + for (unsigned int i = 0; i < cpus_all.size(); i++) + { + if (!(cpus_all[i] < threshold)) + { + cpus_not_little.emplace_back(cpus_all[i]); + } + } + return cpus_not_little.size(); +} +#endif /* defined(__ANDROID__) */ #elif defined(__aarch64__) && \ defined(__APPLE__) /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ /** Query features through sysctlbyname @@ -402,6 +443,15 @@ uint32_t CpuInfo::num_cpus() const return _cpus.size(); } +uint32_t CpuInfo::not_little_num_cpus() const +{ +#if defined(__ANDROID__) + return not_little_num_cpus_internal(); +#else /* defined(__ANDROID__) */ + return num_cpus(); +#endif /* defined(__ANDROID__) */ +} + uint32_t num_threads_hint() { unsigned int num_threads_hint = 1; diff --git a/src/common/cpuinfo/CpuInfo.h b/src/common/cpuinfo/CpuInfo.h index 953e4883c3..78d11e9610 100644 --- a/src/common/cpuinfo/CpuInfo.h +++ b/src/common/cpuinfo/CpuInfo.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Arm Limited. + * Copyright (c) 2021-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_COMMON_CPUINFO_H -#define SRC_COMMON_CPUINFO_H +#ifndef ACL_SRC_COMMON_CPUINFO_CPUINFO_H +#define ACL_SRC_COMMON_CPUINFO_CPUINFO_H #include "src/common/cpuinfo/CpuIsaInfo.h" #include "src/common/cpuinfo/CpuModel.h" @@ -120,6 +120,7 @@ public: CpuModel cpu_model(uint32_t cpuid) const; CpuModel cpu_model() const; uint32_t num_cpus() const; + uint32_t not_little_num_cpus() const; private: CpuIsaInfo _isa{}; @@ -135,4 +136,4 @@ private: uint32_t num_threads_hint(); } // namespace cpuinfo } // namespace arm_compute -#endif /* SRC_COMMON_CPUINFO_H */ +#endif // ACL_SRC_COMMON_CPUINFO_CPUINFO_H diff --git a/src/core/CPP/CPPTypes.cpp b/src/core/CPP/CPPTypes.cpp index f6761f27b0..ee39210fa5 100644 --- a/src/core/CPP/CPPTypes.cpp +++ b/src/core/CPP/CPPTypes.cpp @@ -140,10 +140,20 @@ unsigned int CPUInfo::get_L2_cache_size() const unsigned long CPUInfo::get_sme2_vector_length() const { #ifdef ARM_COMPUTE_ENABLE_SME2 - return arm_gemm::utils::sme::get_vector_length<int8_t>(); + if (this->has_sme2()) + return arm_gemm::utils::sme::get_vector_length<int8_t>(); + else + return 0; #else // ARM_COMPUTE_ENABLE_SME2 return 0; #endif // ARM_COMPUTE_ENABLE_SME2 } - +unsigned int CPUInfo::get_cpu_num_excluding_little() const +{ +#if defined(__ANDROID__) + return _impl->info.not_little_num_cpus(); +#else /* defined(__ANDROID__) */ + return get_cpu_num(); +#endif /* defined(__ANDROID__) */ +} } // namespace arm_compute diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp index 717fd11485..153c36052a 100644 --- a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021, 2023 Arm Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -78,11 +78,11 @@ static const BatchNormalizationKernel available_kernels[] = { REGISTER_FP32_SVE(arm_compute::cpu::fp32_sve_batch_normalization)}, #endif /* !defined(ARM_COMPUTE_ENABLE_SVE) */ #if defined(ARM_COMPUTE_ENABLE_NEON) -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +#if ARM_COMPUTE_ENABLE_FP16 {"neon_fp16_batch_normalization", [](const BatchNormalizationSelectorData &data) { return data.dt == DataType::F16; }, REGISTER_FP16_NEON(arm_compute::cpu::fp16_neon_batch_normalization)}, -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ {"neon_fp32_batch_normalization", [](const BatchNormalizationSelectorData &data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::fp32_neon_batch_normalization)}, diff --git a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp index cb869838e2..694def1a3a 100644 --- a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp +++ b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022 Arm Limited. + * Copyright (c) 2019-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -63,11 +63,11 @@ static const BoundingBoxTransformKernel available_kernels[] = { {"fp32_neon_boundingboxtransform", [](const BoundingBoxTransformSelectorData &data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_boundingboxtransform)}, -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 {"fp16_neon_boundingboxtransform", [](const BoundingBoxTransformSelectorData &data) { return data.dt == DataType::F16; }, REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_boundingboxtransform)}, -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 #if defined(ARM_COMPUTE_ENABLE_NEON) {"qu16_neon_boundingboxtransform", [](const BoundingBoxTransformSelectorData &data) { return data.dt == DataType::QASYMM16; }, diff --git a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp index 549319e49f..e23e3d020f 100644 --- a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp +++ b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022 Arm Limited. + * Copyright (c) 2019-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -61,10 +61,10 @@ static const ComputeAllAnchorsKernel available_kernels[] = { {"neon_qu16_computeallanchors", [](const ComputeAllAnchorsData &data) { return data.dt == DataType::QSYMM16; }, REGISTER_QSYMM16_NEON(arm_compute::cpu::neon_qu16_computeallanchors)}, #endif //defined(ARM_COMPUTE_ENABLE_NEON) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 {"neon_fp16_computeallanchors", [](const ComputeAllAnchorsData &data) { return data.dt == DataType::F16; }, REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_computeallanchors)}, -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 {"neon_fp32_computeallanchors", [](const ComputeAllAnchorsData &data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_computeallanchors)}, }; diff --git a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp index 0a1780f6ee..5883731088 100644 --- a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022 Arm Limited. + * Copyright (c) 2019-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -70,10 +70,10 @@ struct InstanceNormKernel static const InstanceNormKernel available_kernels[] = { {"fp32_neon_instancenorm", [](const InstanceNormSelectorData &data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_instancenorm)}, -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 {"fp16_neon_instancenorm", [](const InstanceNormSelectorData &data) { return data.dt == DataType::F16; }, REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_instancenorm)}, -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 }; /** Micro-kernel selector diff --git a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp index 451031d696..cfe4ac9a4c 100644 --- a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp +++ b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022 Arm Limited. + * Copyright (c) 2019-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -60,10 +60,10 @@ struct MeanStdDevNormKernel static const std::vector<MeanStdDevNormKernel> available_kernels = { {"fp32_neon_meanstddevnorm", [](const MeanStdDevNormSelectorData &data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_meanstddevnorm)}, -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 {"fp16_neon_meanstddevnorm", [](const MeanStdDevNormSelectorData &data) { return data.dt == DataType::F16; }, REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_meanstddevnorm)}, -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 {"qasymm8_neon_meanstddevnorm", [](const MeanStdDevNormSelectorData &data) { return data.dt == DataType::QASYMM8; }, REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_meanstddevnorm)}, }; diff --git a/src/core/helpers/LUTManager.cpp b/src/core/helpers/LUTManager.cpp index 06e35eed8c..2effffbe92 100644 --- a/src/core/helpers/LUTManager.cpp +++ b/src/core/helpers/LUTManager.cpp @@ -30,17 +30,38 @@ namespace arm_compute namespace { -void init_lut_fp16(ActivationLayerInfo::LookupTable65536 *lut) +float16_t activation(float16_t x, const LUTInfo &info) +{ + float16_t out = 0.f; + switch (info.act) + { + case ActivationLayerInfo::ActivationFunction::LOGISTIC: + out = 1.f / (1.f + std::exp(-x)); + break; + case ActivationLayerInfo::ActivationFunction::TANH: + { + out = static_cast<float16_t>(info.alpha * std::tanh(info.beta * x)); + break; + } + default: + ARM_COMPUTE_ERROR("Unsupported Activation for 16-bit LUT table"); + break; + } + return out; +} + +void init_lut_fp16(ActivationLayerInfo::LookupTable65536 *lut, const LUTInfo &info) { union Element { uint16_t i = 0; float16_t fp; } item; + // Fill lut by iterating over all 16 bit values using the union. while (true) { - (*lut)[item.i] = 1.f / (1.f + std::exp(-item.fp)); + (*lut)[item.i] = activation(item.fp, info); if (item.i == 65535) break; item.i++; @@ -62,7 +83,7 @@ std::shared_ptr<ActivationLayerInfo::LookupTable65536> LUTManager::get_lut_table // Not found, or pointer not valid // We do not use make_shared to prevent the weak_ptr keeping the control block alive std::shared_ptr<ActivationLayerInfo::LookupTable65536> ptr(new ActivationLayerInfo::LookupTable65536); - init_lut_fp16(ptr.get()); + init_lut_fp16(ptr.get(), info); map_fp16[info] = ptr; return ptr; } diff --git a/src/core/helpers/LUTManager.h b/src/core/helpers/LUTManager.h index 4e13ead7e3..f3f4bf2832 100644 --- a/src/core/helpers/LUTManager.h +++ b/src/core/helpers/LUTManager.h @@ -38,19 +38,23 @@ namespace arm_compute struct LUTInfo { ActivationLayerInfo::ActivationFunction act; + float alpha; + float beta; DataType dt; - QuantizationInfo qinfo; + UniformQuantizationInfo qinfo; + // Operators enable use of map with Lutinfo as key friend bool operator<(const LUTInfo &l, const LUTInfo &r) { - return (l.act < r.act) || ((l.act == r.act) && (l.dt < r.dt)) || - ((l.act == r.act) && (l.dt == r.dt) && (l.qinfo.scale() < r.qinfo.scale())) || - ((l.act == r.act) && (l.dt == r.dt) && (l.qinfo.scale() == r.qinfo.scale()) && - (l.qinfo.offset() < l.qinfo.offset())); + const auto l_tup = std::make_tuple(l.act, l.alpha, l.beta, l.dt, l.qinfo.scale, l.qinfo.offset); + const auto r_tup = std::make_tuple(r.act, r.alpha, r.beta, r.dt, r.qinfo.scale, r.qinfo.offset); + + return l_tup < r_tup; } - bool operator==(const LUTInfo &l) + bool operator==(const LUTInfo &l) const { - return this->act == l.act && this->dt == l.dt && this->qinfo == l.qinfo; + return this->act == l.act && this->alpha == l.alpha && this->beta == l.beta && this->dt == l.dt && + this->qinfo == l.qinfo; } }; diff --git a/src/cpu/kernels/CpuActivationKernel.cpp b/src/cpu/kernels/CpuActivationKernel.cpp index 7cfa39b286..4253027231 100644 --- a/src/cpu/kernels/CpuActivationKernel.cpp +++ b/src/cpu/kernels/CpuActivationKernel.cpp @@ -43,6 +43,13 @@ namespace kernels { namespace { + +bool is_fp16_lut_supported(ActivationLayerInfo::ActivationFunction func) +{ + return func == ActivationLayerInfo::ActivationFunction::LOGISTIC || + func == ActivationLayerInfo::ActivationFunction::TANH; +} + static const std::vector<CpuActivationKernel::ActivationKernel> available_kernels = { #ifdef ARM_COMPUTE_ENABLE_SVE {"sve2_q8_activation_lut", @@ -85,10 +92,7 @@ static const std::vector<CpuActivationKernel::ActivationKernel> available_kernel REGISTER_QSYMM16_SVE2(arm_compute::cpu::sve2_qsymm16_activation)}, {"sve_fp16_activation_lut", [](const ActivationDataTypeISASelectorData &data) - { - return data.dt == DataType::F16 && data.isa.fp16 && data.isa.sve && - data.f == ActivationLayerInfo::ActivationFunction::LOGISTIC; - }, + { return data.dt == DataType::F16 && data.isa.fp16 && data.isa.sve && is_fp16_lut_supported(data.f); }, REGISTER_FP16_SVE(arm_compute::cpu::sve_fp16_activation_lut)}, {"sve_fp16_activation", [](const ActivationDataTypeISASelectorData &data) @@ -299,10 +303,10 @@ void CpuActivationKernel::configure(const ITensorInfo *src, ITensorInfo *dst, Ac activation_info.setLookupTable256(tmp_lut); } - if (src->data_type() == DataType::F16 && - activation_info.activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC) + if (std::string(uk->name) == "sve_fp16_activation_lut") { - const LUTInfo info = {activation_info.activation(), src->data_type(), src->quantization_info()}; + const LUTInfo info = {activation_info.activation(), activation_info.a(), activation_info.b(), src->data_type(), + src->quantization_info().uniform()}; activation_info.setLookupTable65536((lut_manager.get_lut_table(info))); } #endif // __aarch64__ diff --git a/src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp b/src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp index 60fda511e3..6a93be0618 100644 --- a/src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp +++ b/src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -81,7 +81,7 @@ void vector_matrix_multiply_f16( // window_end_x is computed above which may cause out-of-bound writes to the dst. for (; x < (window_end_x - window_step_x); x += window_step_x) { - if (x > width_matrix_b) + if (x >= width_matrix_b) { return; } @@ -176,7 +176,7 @@ void vector_matrix_multiply_f16( for (; x < window_end_x; ++x) { - if (x > width_matrix_b) + if (x >= width_matrix_b) { return; } diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp index d4d6193fce..baffa8cbb2 100644 --- a/src/runtime/OMP/OMPScheduler.cpp +++ b/src/runtime/OMP/OMPScheduler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,10 +32,21 @@ namespace arm_compute { +#if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__) OMPScheduler::OMPScheduler() // NOLINT - : _num_threads(omp_get_max_threads()) + : _num_threads(cpu_info().get_cpu_num_excluding_little()), + _nonlittle_num_cpus(cpu_info().get_cpu_num_excluding_little()) { } +#else /* !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/ +OMPScheduler::OMPScheduler() // NOLINT + : _num_threads(omp_get_max_threads()), _nonlittle_num_cpus(cpu_info().get_cpu_num_excluding_little()) +{ +} +#endif /* !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/ unsigned int OMPScheduler::num_threads() const { @@ -45,7 +56,15 @@ unsigned int OMPScheduler::num_threads() const void OMPScheduler::set_num_threads(unsigned int num_threads) { const unsigned int num_cores = omp_get_max_threads(); - _num_threads = (num_threads == 0) ? num_cores : num_threads; +#if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__) + const unsigned int adjusted_num_threads = std::min(_nonlittle_num_cpus, num_threads); + _num_threads = (num_threads == 0) ? num_cores : adjusted_num_threads; +#else /* !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/ + _num_threads = (num_threads == 0) ? num_cores : num_threads; +#endif /* !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/ } void OMPScheduler::schedule(ICPPKernel *kernel, const Hints &hints) @@ -99,9 +118,15 @@ void OMPScheduler::run_workloads(std::vector<arm_compute::IScheduler::Workload> } ThreadInfo info; - info.cpu_info = &cpu_info(); + info.cpu_info = &cpu_info(); + +#if !defined(__ANDROID__) + info.num_threads = _num_threads; +#else /* !__ANDROID__ */ info.num_threads = num_threads_to_use; -#pragma omp parallel for firstprivate(info) num_threads(num_threads_to_use) default(shared) proc_bind(close) \ +#endif /* __ANDROID__ */ + +#pragma omp parallel for firstprivate(info) num_threads(info.num_threads) default(shared) proc_bind(close) \ schedule(static, 1) for (unsigned int wid = 0; wid < amount_of_work; ++wid) { |