aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/cpuinfo/CpuInfo.cpp50
-rw-r--r--src/common/cpuinfo/CpuInfo.h9
-rw-r--r--src/core/CPP/CPPTypes.cpp14
-rw-r--r--src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp6
-rw-r--r--src/core/helpers/LUTManager.cpp27
-rw-r--r--src/core/helpers/LUTManager.h18
-rw-r--r--src/cpu/kernels/CpuActivationKernel.cpp18
-rw-r--r--src/cpu/kernels/CpuDequantizeKernel.cpp2
-rw-r--r--src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp6
-rw-r--r--src/runtime/OMP/OMPScheduler.cpp35
14 files changed, 162 insertions, 47 deletions
diff --git a/src/common/cpuinfo/CpuInfo.cpp b/src/common/cpuinfo/CpuInfo.cpp
index 809ab3e2c3..d46d8d7773 100644
--- a/src/common/cpuinfo/CpuInfo.cpp
+++ b/src/common/cpuinfo/CpuInfo.cpp
@@ -29,6 +29,7 @@
#include "support/StringSupport.h"
#include "support/ToolchainSupport.h"
+#include <map>
#include <sstream>
#if !defined(BARE_METAL)
@@ -269,6 +270,46 @@ int get_max_cpus()
}
return max_cpus;
}
+#if defined(__ANDROID__)
+std::vector<uint32_t> get_cpu_capacities()
+{
+ std::vector<uint32_t> cpu_capacities;
+ for (int i = 0; i < get_max_cpus(); ++i)
+ {
+ std::stringstream str;
+ str << "/sys/devices/system/cpu/cpu" << i << "/cpu_capacity";
+ std::ifstream file(str.str(), std::ios::in);
+ if (file.is_open())
+ {
+ std::string line;
+ if (bool(getline(file, line)))
+ {
+ cpu_capacities.emplace_back(support::cpp11::stoul(line));
+ }
+ }
+ }
+
+ return cpu_capacities;
+}
+
+uint32_t not_little_num_cpus_internal()
+{
+ std::vector<uint32_t> cpus_all = get_cpu_capacities();
+ std::vector<uint32_t> cpus_not_little;
+
+ std::vector<uint32_t>::iterator result = std::max_element(cpus_all.begin(), cpus_all.end());
+ uint32_t max_capacity = *result;
+ uint32_t threshold = max_capacity / 2;
+ for (unsigned int i = 0; i < cpus_all.size(); i++)
+ {
+ if (!(cpus_all[i] < threshold))
+ {
+ cpus_not_little.emplace_back(cpus_all[i]);
+ }
+ }
+ return cpus_not_little.size();
+}
+#endif /* defined(__ANDROID__) */
#elif defined(__aarch64__) && \
defined(__APPLE__) /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */
/** Query features through sysctlbyname
@@ -402,6 +443,15 @@ uint32_t CpuInfo::num_cpus() const
return _cpus.size();
}
+uint32_t CpuInfo::not_little_num_cpus() const
+{
+#if defined(__ANDROID__)
+ return not_little_num_cpus_internal();
+#else /* defined(__ANDROID__) */
+ return num_cpus();
+#endif /* defined(__ANDROID__) */
+}
+
uint32_t num_threads_hint()
{
unsigned int num_threads_hint = 1;
diff --git a/src/common/cpuinfo/CpuInfo.h b/src/common/cpuinfo/CpuInfo.h
index 953e4883c3..78d11e9610 100644
--- a/src/common/cpuinfo/CpuInfo.h
+++ b/src/common/cpuinfo/CpuInfo.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022 Arm Limited.
+ * Copyright (c) 2021-2022, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_COMMON_CPUINFO_H
-#define SRC_COMMON_CPUINFO_H
+#ifndef ACL_SRC_COMMON_CPUINFO_CPUINFO_H
+#define ACL_SRC_COMMON_CPUINFO_CPUINFO_H
#include "src/common/cpuinfo/CpuIsaInfo.h"
#include "src/common/cpuinfo/CpuModel.h"
@@ -120,6 +120,7 @@ public:
CpuModel cpu_model(uint32_t cpuid) const;
CpuModel cpu_model() const;
uint32_t num_cpus() const;
+ uint32_t not_little_num_cpus() const;
private:
CpuIsaInfo _isa{};
@@ -135,4 +136,4 @@ private:
uint32_t num_threads_hint();
} // namespace cpuinfo
} // namespace arm_compute
-#endif /* SRC_COMMON_CPUINFO_H */
+#endif // ACL_SRC_COMMON_CPUINFO_CPUINFO_H
diff --git a/src/core/CPP/CPPTypes.cpp b/src/core/CPP/CPPTypes.cpp
index f6761f27b0..ee39210fa5 100644
--- a/src/core/CPP/CPPTypes.cpp
+++ b/src/core/CPP/CPPTypes.cpp
@@ -140,10 +140,20 @@ unsigned int CPUInfo::get_L2_cache_size() const
unsigned long CPUInfo::get_sme2_vector_length() const
{
#ifdef ARM_COMPUTE_ENABLE_SME2
- return arm_gemm::utils::sme::get_vector_length<int8_t>();
+ if (this->has_sme2())
+ return arm_gemm::utils::sme::get_vector_length<int8_t>();
+ else
+ return 0;
#else // ARM_COMPUTE_ENABLE_SME2
return 0;
#endif // ARM_COMPUTE_ENABLE_SME2
}
-
+unsigned int CPUInfo::get_cpu_num_excluding_little() const
+{
+#if defined(__ANDROID__)
+ return _impl->info.not_little_num_cpus();
+#else /* defined(__ANDROID__) */
+ return get_cpu_num();
+#endif /* defined(__ANDROID__) */
+}
} // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
index 717fd11485..153c36052a 100644
--- a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021, 2023 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -78,11 +78,11 @@ static const BatchNormalizationKernel available_kernels[] = {
REGISTER_FP32_SVE(arm_compute::cpu::fp32_sve_batch_normalization)},
#endif /* !defined(ARM_COMPUTE_ENABLE_SVE) */
#if defined(ARM_COMPUTE_ENABLE_NEON)
-#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
+#if ARM_COMPUTE_ENABLE_FP16
{"neon_fp16_batch_normalization",
[](const BatchNormalizationSelectorData &data) { return data.dt == DataType::F16; },
REGISTER_FP16_NEON(arm_compute::cpu::fp16_neon_batch_normalization)},
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
{"neon_fp32_batch_normalization",
[](const BatchNormalizationSelectorData &data) { return data.dt == DataType::F32; },
REGISTER_FP32_NEON(arm_compute::cpu::fp32_neon_batch_normalization)},
diff --git a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp
index cb869838e2..694def1a3a 100644
--- a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp
+++ b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2022, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -63,11 +63,11 @@ static const BoundingBoxTransformKernel available_kernels[] = {
{"fp32_neon_boundingboxtransform",
[](const BoundingBoxTransformSelectorData &data) { return data.dt == DataType::F32; },
REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_boundingboxtransform)},
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
{"fp16_neon_boundingboxtransform",
[](const BoundingBoxTransformSelectorData &data) { return data.dt == DataType::F16; },
REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_boundingboxtransform)},
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
#if defined(ARM_COMPUTE_ENABLE_NEON)
{"qu16_neon_boundingboxtransform",
[](const BoundingBoxTransformSelectorData &data) { return data.dt == DataType::QASYMM16; },
diff --git a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp
index 549319e49f..e23e3d020f 100644
--- a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2022, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -61,10 +61,10 @@ static const ComputeAllAnchorsKernel available_kernels[] = {
{"neon_qu16_computeallanchors", [](const ComputeAllAnchorsData &data) { return data.dt == DataType::QSYMM16; },
REGISTER_QSYMM16_NEON(arm_compute::cpu::neon_qu16_computeallanchors)},
#endif //defined(ARM_COMPUTE_ENABLE_NEON)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
{"neon_fp16_computeallanchors", [](const ComputeAllAnchorsData &data) { return data.dt == DataType::F16; },
REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_computeallanchors)},
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
{"neon_fp32_computeallanchors", [](const ComputeAllAnchorsData &data) { return data.dt == DataType::F32; },
REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_computeallanchors)},
};
diff --git a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp
index 0a1780f6ee..5883731088 100644
--- a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2022, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -70,10 +70,10 @@ struct InstanceNormKernel
static const InstanceNormKernel available_kernels[] = {
{"fp32_neon_instancenorm", [](const InstanceNormSelectorData &data) { return data.dt == DataType::F32; },
REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_instancenorm)},
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
{"fp16_neon_instancenorm", [](const InstanceNormSelectorData &data) { return data.dt == DataType::F16; },
REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_instancenorm)},
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
};
/** Micro-kernel selector
diff --git a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp
index 451031d696..cfe4ac9a4c 100644
--- a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp
+++ b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2022, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -60,10 +60,10 @@ struct MeanStdDevNormKernel
static const std::vector<MeanStdDevNormKernel> available_kernels = {
{"fp32_neon_meanstddevnorm", [](const MeanStdDevNormSelectorData &data) { return data.dt == DataType::F32; },
REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_meanstddevnorm)},
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
{"fp16_neon_meanstddevnorm", [](const MeanStdDevNormSelectorData &data) { return data.dt == DataType::F16; },
REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_meanstddevnorm)},
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
{"qasymm8_neon_meanstddevnorm", [](const MeanStdDevNormSelectorData &data) { return data.dt == DataType::QASYMM8; },
REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_meanstddevnorm)},
};
diff --git a/src/core/helpers/LUTManager.cpp b/src/core/helpers/LUTManager.cpp
index 06e35eed8c..2effffbe92 100644
--- a/src/core/helpers/LUTManager.cpp
+++ b/src/core/helpers/LUTManager.cpp
@@ -30,17 +30,38 @@ namespace arm_compute
namespace
{
-void init_lut_fp16(ActivationLayerInfo::LookupTable65536 *lut)
+float16_t activation(float16_t x, const LUTInfo &info)
+{
+ float16_t out = 0.f;
+ switch (info.act)
+ {
+ case ActivationLayerInfo::ActivationFunction::LOGISTIC:
+ out = 1.f / (1.f + std::exp(-x));
+ break;
+ case ActivationLayerInfo::ActivationFunction::TANH:
+ {
+ out = static_cast<float16_t>(info.alpha * std::tanh(info.beta * x));
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Unsupported Activation for 16-bit LUT table");
+ break;
+ }
+ return out;
+}
+
+void init_lut_fp16(ActivationLayerInfo::LookupTable65536 *lut, const LUTInfo &info)
{
union Element
{
uint16_t i = 0;
float16_t fp;
} item;
+
// Fill lut by iterating over all 16 bit values using the union.
while (true)
{
- (*lut)[item.i] = 1.f / (1.f + std::exp(-item.fp));
+ (*lut)[item.i] = activation(item.fp, info);
if (item.i == 65535)
break;
item.i++;
@@ -62,7 +83,7 @@ std::shared_ptr<ActivationLayerInfo::LookupTable65536> LUTManager::get_lut_table
// Not found, or pointer not valid
// We do not use make_shared to prevent the weak_ptr keeping the control block alive
std::shared_ptr<ActivationLayerInfo::LookupTable65536> ptr(new ActivationLayerInfo::LookupTable65536);
- init_lut_fp16(ptr.get());
+ init_lut_fp16(ptr.get(), info);
map_fp16[info] = ptr;
return ptr;
}
diff --git a/src/core/helpers/LUTManager.h b/src/core/helpers/LUTManager.h
index 4e13ead7e3..f3f4bf2832 100644
--- a/src/core/helpers/LUTManager.h
+++ b/src/core/helpers/LUTManager.h
@@ -38,19 +38,23 @@ namespace arm_compute
struct LUTInfo
{
ActivationLayerInfo::ActivationFunction act;
+ float alpha;
+ float beta;
DataType dt;
- QuantizationInfo qinfo;
+ UniformQuantizationInfo qinfo;
+
// Operators enable use of map with Lutinfo as key
friend bool operator<(const LUTInfo &l, const LUTInfo &r)
{
- return (l.act < r.act) || ((l.act == r.act) && (l.dt < r.dt)) ||
- ((l.act == r.act) && (l.dt == r.dt) && (l.qinfo.scale() < r.qinfo.scale())) ||
- ((l.act == r.act) && (l.dt == r.dt) && (l.qinfo.scale() == r.qinfo.scale()) &&
- (l.qinfo.offset() < l.qinfo.offset()));
+ const auto l_tup = std::make_tuple(l.act, l.alpha, l.beta, l.dt, l.qinfo.scale, l.qinfo.offset);
+ const auto r_tup = std::make_tuple(r.act, r.alpha, r.beta, r.dt, r.qinfo.scale, r.qinfo.offset);
+
+ return l_tup < r_tup;
}
- bool operator==(const LUTInfo &l)
+ bool operator==(const LUTInfo &l) const
{
- return this->act == l.act && this->dt == l.dt && this->qinfo == l.qinfo;
+ return this->act == l.act && this->alpha == l.alpha && this->beta == l.beta && this->dt == l.dt &&
+ this->qinfo == l.qinfo;
}
};
diff --git a/src/cpu/kernels/CpuActivationKernel.cpp b/src/cpu/kernels/CpuActivationKernel.cpp
index 7cfa39b286..4253027231 100644
--- a/src/cpu/kernels/CpuActivationKernel.cpp
+++ b/src/cpu/kernels/CpuActivationKernel.cpp
@@ -43,6 +43,13 @@ namespace kernels
{
namespace
{
+
+bool is_fp16_lut_supported(ActivationLayerInfo::ActivationFunction func)
+{
+ return func == ActivationLayerInfo::ActivationFunction::LOGISTIC ||
+ func == ActivationLayerInfo::ActivationFunction::TANH;
+}
+
static const std::vector<CpuActivationKernel::ActivationKernel> available_kernels = {
#ifdef ARM_COMPUTE_ENABLE_SVE
{"sve2_q8_activation_lut",
@@ -85,10 +92,7 @@ static const std::vector<CpuActivationKernel::ActivationKernel> available_kernel
REGISTER_QSYMM16_SVE2(arm_compute::cpu::sve2_qsymm16_activation)},
{"sve_fp16_activation_lut",
[](const ActivationDataTypeISASelectorData &data)
- {
- return data.dt == DataType::F16 && data.isa.fp16 && data.isa.sve &&
- data.f == ActivationLayerInfo::ActivationFunction::LOGISTIC;
- },
+ { return data.dt == DataType::F16 && data.isa.fp16 && data.isa.sve && is_fp16_lut_supported(data.f); },
REGISTER_FP16_SVE(arm_compute::cpu::sve_fp16_activation_lut)},
{"sve_fp16_activation",
[](const ActivationDataTypeISASelectorData &data)
@@ -299,10 +303,10 @@ void CpuActivationKernel::configure(const ITensorInfo *src, ITensorInfo *dst, Ac
activation_info.setLookupTable256(tmp_lut);
}
- if (src->data_type() == DataType::F16 &&
- activation_info.activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC)
+ if (std::string(uk->name) == "sve_fp16_activation_lut")
{
- const LUTInfo info = {activation_info.activation(), src->data_type(), src->quantization_info()};
+ const LUTInfo info = {activation_info.activation(), activation_info.a(), activation_info.b(), src->data_type(),
+ src->quantization_info().uniform()};
activation_info.setLookupTable65536((lut_manager.get_lut_table(info)));
}
#endif // __aarch64__
diff --git a/src/cpu/kernels/CpuDequantizeKernel.cpp b/src/cpu/kernels/CpuDequantizeKernel.cpp
index 6154ad3e3b..5595ace998 100644
--- a/src/cpu/kernels/CpuDequantizeKernel.cpp
+++ b/src/cpu/kernels/CpuDequantizeKernel.cpp
@@ -85,7 +85,7 @@ void CpuDequantizeKernel::configure(const ITensorInfo *src, ITensorInfo *dst)
break;
#ifdef ARM_COMPUTE_ENABLE_FP16
case DataType::F16:
- _func = REGISTER_FP32_NEON(fp16_run_dequantization_core);
+ _func = REGISTER_FP16_NEON(fp16_run_dequantization_core);
break;
#endif /* ARM_COMPUTE_ENABLE_FP16 */
default:
diff --git a/src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp b/src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp
index 60fda511e3..6a93be0618 100644
--- a/src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp
+++ b/src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -81,7 +81,7 @@ void vector_matrix_multiply_f16(
// window_end_x is computed above which may cause out-of-bound writes to the dst.
for (; x < (window_end_x - window_step_x); x += window_step_x)
{
- if (x > width_matrix_b)
+ if (x >= width_matrix_b)
{
return;
}
@@ -176,7 +176,7 @@ void vector_matrix_multiply_f16(
for (; x < window_end_x; ++x)
{
- if (x > width_matrix_b)
+ if (x >= width_matrix_b)
{
return;
}
diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp
index d4d6193fce..baffa8cbb2 100644
--- a/src/runtime/OMP/OMPScheduler.cpp
+++ b/src/runtime/OMP/OMPScheduler.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2023 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -32,10 +32,21 @@
namespace arm_compute
{
+#if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
+ (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)
OMPScheduler::OMPScheduler() // NOLINT
- : _num_threads(omp_get_max_threads())
+ : _num_threads(cpu_info().get_cpu_num_excluding_little()),
+ _nonlittle_num_cpus(cpu_info().get_cpu_num_excluding_little())
{
}
+#else /* !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
+ (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/
+OMPScheduler::OMPScheduler() // NOLINT
+ : _num_threads(omp_get_max_threads()), _nonlittle_num_cpus(cpu_info().get_cpu_num_excluding_little())
+{
+}
+#endif /* !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
+ (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/
unsigned int OMPScheduler::num_threads() const
{
@@ -45,7 +56,15 @@ unsigned int OMPScheduler::num_threads() const
void OMPScheduler::set_num_threads(unsigned int num_threads)
{
const unsigned int num_cores = omp_get_max_threads();
- _num_threads = (num_threads == 0) ? num_cores : num_threads;
+#if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
+ (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)
+ const unsigned int adjusted_num_threads = std::min(_nonlittle_num_cpus, num_threads);
+ _num_threads = (num_threads == 0) ? num_cores : adjusted_num_threads;
+#else /* !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
+ (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/
+ _num_threads = (num_threads == 0) ? num_cores : num_threads;
+#endif /* !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
+ (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/
}
void OMPScheduler::schedule(ICPPKernel *kernel, const Hints &hints)
@@ -99,9 +118,15 @@ void OMPScheduler::run_workloads(std::vector<arm_compute::IScheduler::Workload>
}
ThreadInfo info;
- info.cpu_info = &cpu_info();
+ info.cpu_info = &cpu_info();
+
+#if !defined(__ANDROID__)
+ info.num_threads = _num_threads;
+#else /* !__ANDROID__ */
info.num_threads = num_threads_to_use;
-#pragma omp parallel for firstprivate(info) num_threads(num_threads_to_use) default(shared) proc_bind(close) \
+#endif /* __ANDROID__ */
+
+#pragma omp parallel for firstprivate(info) num_threads(info.num_threads) default(shared) proc_bind(close) \
schedule(static, 1)
for (unsigned int wid = 0; wid < amount_of_work; ++wid)
{