aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt2
-rw-r--r--README.md24
-rw-r--r--SConscript30
-rw-r--r--docs/user_guide/errata.dox10
-rw-r--r--docs/user_guide/release_version_and_change_log.dox4
-rw-r--r--src/core/CPP/CPPTypes.cpp5
-rw-r--r--src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp6
-rw-r--r--src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp6
-rw-r--r--src/runtime/OMP/OMPScheduler.cpp10
-rw-r--r--tests/SConscript25
-rw-r--r--tests/framework/SConscript20
-rw-r--r--tests/validation/fixtures/DirectConvolution3DFixture.h5
-rw-r--r--tests/validation/reference/Conv3D.cpp24
-rw-r--r--tests/validation/reference/Conv3D.h10
18 files changed, 145 insertions, 60 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c67479ce41..f291534201 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,7 +28,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
list(APPEND CMAKE_MESSAGE_CONTEXT ArmCompute)
project(
ArmCompute
- VERSION 36.0.0
+ VERSION 38.0.0
DESCRIPTION
"The Arm Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A CPU and Arm® Mali™ GPU architectures"
LANGUAGES C CXX ASM)
diff --git a/README.md b/README.md
index 71bff561a6..02dd05edac 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
<img src="https://raw.githubusercontent.com/ARM-software/ComputeLibrary/gh-pages/ACL_logo.png"/><br><br>
</div>
-# Compute Library ![](https://img.shields.io/badge/latest_release-{{UNRELEASED}}-green)
+# Compute Library ![](https://img.shields.io/badge/latest_release-24.06-green)
The Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A, Arm® Neoverse® and Arm® Mali™ GPUs architectures.<br>
@@ -37,7 +37,7 @@ Key Features:
<br>
## Documentation
-[![Documentation](https://img.shields.io/badge/documentation-{{UNRELEASED}}-green)](https://arm-software.github.io/ComputeLibrary/latest)
+[![Documentation](https://img.shields.io/badge/documentation-24.06-green)](https://arm-software.github.io/ComputeLibrary/latest)
> Note: The documentation includes the reference API, changelogs, build guide, contribution guide, errata, etc.
@@ -50,24 +50,24 @@ All the binaries can be downloaded from [here](https://github.com/ARM-software/C
| Platform | Operating System | Release archive (Download) |
| -------------- | ---------------- | -------------------------- |
-| Raspberry Pi 4 | Linux® 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-armv7a-neon.tar.gz) |
-| Raspberry Pi 4 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-neon.tar.gz) |
-| Odroid N2 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-neon-cl.tar.gz) |
-| HiKey960 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-neon-cl.tar.gz) |
+| Raspberry Pi 4 | Linux® 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-armv7a-neon.tar.gz) |
+| Raspberry Pi 4 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon.tar.gz) |
+| Odroid N2 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon-cl.tar.gz) |
+| HiKey960 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon-cl.tar.gz) |
<br>
| Architecture | Operating System | Release archive (Download) |
| ------------ | ---------------- | -------------------------- |
-| armv7 | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-armv7a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-armv7a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-armv7a-neon-cl.tar.gz) |
-| arm64-v8a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-android-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-android-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-android-arm64-v8a-neon-cl.tar.gz) |
-| arm64-v8a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-neon-cl.tar.gz) |
-| arm64-v8.2-a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-android-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-android-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-android-arm64-v8.2-a-neon-cl.tar.gz) |
-| arm64-v8.2-a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8.2-a-neon-cl.tar.gz) |
+| armv7 | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-armv7a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-armv7a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-armv7a-neon-cl.tar.gz) |
+| arm64-v8a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8a-neon-cl.tar.gz) |
+| arm64-v8a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon-cl.tar.gz) |
+| arm64-v8.2-a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8.2-a-neon-cl.tar.gz) |
+| arm64-v8.2-a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8.2-a-neon-cl.tar.gz) |
<br>
-Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v{{UNRELEASED}}-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v{{UNRELEASED}})
+Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v24.06-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v24.06)
Pre-build binaries are generated with the following security / good coding practices related flags:
> -Wall, -Wextra, -Wformat=2, -Winit-self, -Wstrict-overflow=2, -Wswitch-default, -Woverloaded-virtual, -Wformat-security, -Wctor-dtor-privacy, -Wsign-promo, -Weffc++, -pedantic, -fstack-protector-strong
diff --git a/SConscript b/SConscript
index 80aa87cae8..069e0906f4 100644
--- a/SConscript
+++ b/SConscript
@@ -31,9 +31,10 @@ import zlib
import json
import codecs
import platform
+import SCons
-VERSION = "v0.0-unreleased"
-LIBRARY_VERSION_MAJOR = 36
+VERSION = "v24.06"
+LIBRARY_VERSION_MAJOR = 38
LIBRARY_VERSION_MINOR = 0
LIBRARY_VERSION_PATCH = 0
SONAME_VERSION = str(LIBRARY_VERSION_MAJOR) + "." + str(LIBRARY_VERSION_MINOR) + "." + str(LIBRARY_VERSION_PATCH)
@@ -151,6 +152,31 @@ def get_ckw_obj_list():
def build_library(name, build_env, sources, static=False, libs=[]):
cloned_build_env = build_env.Clone()
+
+ #Set up to use temp file for long command when building and linking libraries
+ cloned_build_env['TEMPFILE'] = SCons.Platform.TempFileMunge
+
+ #To use temp file for any command, the following pattern should be used:
+ # env['COMMAND'] = "{$TEMPFILE('$COMMANDSTRING')}"
+ #See: https://github.com/SCons/scons/blob/05f2992377844bbfec9bcd4a9c7f5479c634b91b/SCons/Platform/__init__.py#L147
+ #The commands' string are taken from https://github.com/SCons/scons
+ #The commands' explanations are taken from Scons userguide
+
+ #The command line used to compile C++ source file to an object file
+ cloned_build_env['CXXCOM'] = "${TEMPFILE('"+ cloned_build_env['CXXCOM'] + "')}"
+ #The command line used to compile C++ source file to a shared-library object file
+ cloned_build_env['SHCXXCOM'] = "${TEMPFILE('"+ cloned_build_env['SHCXXCOM'] + "')}"
+ #The command line used to generate a static library from object files
+ cloned_build_env['ARCOM'] = "${TEMPFILE('"+ cloned_build_env['ARCOM'] + "')}"
+ #The command line used to link object files into an executable
+ cloned_build_env['LINKCOM'] = "${TEMPFILE('"+ cloned_build_env['LINKCOM'] + "')}"
+ #The command line used to link programs using shared libraries
+ cloned_build_env['SHLINKCOM'] = "${TEMPFILE('"+ cloned_build_env['SHLINKCOM'] + "')}"
+ #The command line used to index a static library archive
+ cloned_build_env['RANLIBCOM'] = "${TEMPFILE('"+ cloned_build_env['RANLIBCOM'] + "')}"
+ #Set up directory for temp files. To prevent permission issue, the temp files are in the same directory with output files
+ cloned_build_env['TEMPFILEDIR'] = cloned_build_env['build_dir']
+
if env['os'] == 'android' and static == False:
cloned_build_env["LINKFLAGS"].remove('-pie')
cloned_build_env["LINKFLAGS"].remove('-static-libstdc++')
diff --git a/docs/user_guide/errata.dox b/docs/user_guide/errata.dox
index 056e45a432..c195dc7851 100644
--- a/docs/user_guide/errata.dox
+++ b/docs/user_guide/errata.dox
@@ -1,5 +1,5 @@
///
-/// Copyright (c) 2019-2023 Arm Limited.
+/// Copyright (c) 2019-2024 Arm Limited.
///
/// SPDX-License-Identifier: MIT
///
@@ -30,6 +30,14 @@ namespace arm_compute
@section S7_1_errata Errata
+- (COMPMID-6904) Fix out-of-bound memory write for non-optimized FP16 GeMM kernel.
+ - Versions: >= v17.09 && < v24.06
+ - Oses: Linux, Android, MacOS, Windows.
+ - Conditions:
+ - Compile the latest Arm Compute Library for armv8.2-a or multi_isa
+ - Device with FP16 support
+ - GeMM with beta coefficient != 0 or 1
+
- (COMPMID-6493) Crash when running Arm Compute Library compiled for SVE2 on a computer that support SVE only.
- Versions: >= v21.02 && <=v23.08
- OSs: Linux, Android.
diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox
index d9c2c8476d..16664c8d84 100644
--- a/docs/user_guide/release_version_and_change_log.dox
+++ b/docs/user_guide/release_version_and_change_log.dox
@@ -41,7 +41,9 @@ If there is more than one release in a month then an extra sequential number is
@section S2_2_changelog Changelog
-v24.08 Public major release
+v24.06 Public minor release
+ - Enable FP16 in multiple Neon™ kernels for multi_isa + v8a
+ - Fix OpenMP® thread scheduling for large machine
- Optimize CPU activation functions using LUT-based implementation:
- Tanh function for FP16.
diff --git a/src/core/CPP/CPPTypes.cpp b/src/core/CPP/CPPTypes.cpp
index 67fbce490f..ee39210fa5 100644
--- a/src/core/CPP/CPPTypes.cpp
+++ b/src/core/CPP/CPPTypes.cpp
@@ -140,7 +140,10 @@ unsigned int CPUInfo::get_L2_cache_size() const
unsigned long CPUInfo::get_sme2_vector_length() const
{
#ifdef ARM_COMPUTE_ENABLE_SME2
- return arm_gemm::utils::sme::get_vector_length<int8_t>();
+ if (this->has_sme2())
+ return arm_gemm::utils::sme::get_vector_length<int8_t>();
+ else
+ return 0;
#else // ARM_COMPUTE_ENABLE_SME2
return 0;
#endif // ARM_COMPUTE_ENABLE_SME2
diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
index 717fd11485..153c36052a 100644
--- a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021, 2023 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -78,11 +78,11 @@ static const BatchNormalizationKernel available_kernels[] = {
REGISTER_FP32_SVE(arm_compute::cpu::fp32_sve_batch_normalization)},
#endif /* !defined(ARM_COMPUTE_ENABLE_SVE) */
#if defined(ARM_COMPUTE_ENABLE_NEON)
-#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
+#if ARM_COMPUTE_ENABLE_FP16
{"neon_fp16_batch_normalization",
[](const BatchNormalizationSelectorData &data) { return data.dt == DataType::F16; },
REGISTER_FP16_NEON(arm_compute::cpu::fp16_neon_batch_normalization)},
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
{"neon_fp32_batch_normalization",
[](const BatchNormalizationSelectorData &data) { return data.dt == DataType::F32; },
REGISTER_FP32_NEON(arm_compute::cpu::fp32_neon_batch_normalization)},
diff --git a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp
index cb869838e2..694def1a3a 100644
--- a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp
+++ b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2022, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -63,11 +63,11 @@ static const BoundingBoxTransformKernel available_kernels[] = {
{"fp32_neon_boundingboxtransform",
[](const BoundingBoxTransformSelectorData &data) { return data.dt == DataType::F32; },
REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_boundingboxtransform)},
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
{"fp16_neon_boundingboxtransform",
[](const BoundingBoxTransformSelectorData &data) { return data.dt == DataType::F16; },
REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_boundingboxtransform)},
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
#if defined(ARM_COMPUTE_ENABLE_NEON)
{"qu16_neon_boundingboxtransform",
[](const BoundingBoxTransformSelectorData &data) { return data.dt == DataType::QASYMM16; },
diff --git a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp
index 549319e49f..e23e3d020f 100644
--- a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2022, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -61,10 +61,10 @@ static const ComputeAllAnchorsKernel available_kernels[] = {
{"neon_qu16_computeallanchors", [](const ComputeAllAnchorsData &data) { return data.dt == DataType::QSYMM16; },
REGISTER_QSYMM16_NEON(arm_compute::cpu::neon_qu16_computeallanchors)},
#endif //defined(ARM_COMPUTE_ENABLE_NEON)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
{"neon_fp16_computeallanchors", [](const ComputeAllAnchorsData &data) { return data.dt == DataType::F16; },
REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_computeallanchors)},
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
{"neon_fp32_computeallanchors", [](const ComputeAllAnchorsData &data) { return data.dt == DataType::F32; },
REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_computeallanchors)},
};
diff --git a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp
index 0a1780f6ee..5883731088 100644
--- a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2022, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -70,10 +70,10 @@ struct InstanceNormKernel
static const InstanceNormKernel available_kernels[] = {
{"fp32_neon_instancenorm", [](const InstanceNormSelectorData &data) { return data.dt == DataType::F32; },
REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_instancenorm)},
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
{"fp16_neon_instancenorm", [](const InstanceNormSelectorData &data) { return data.dt == DataType::F16; },
REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_instancenorm)},
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
};
/** Micro-kernel selector
diff --git a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp
index 451031d696..cfe4ac9a4c 100644
--- a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp
+++ b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2022, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -60,10 +60,10 @@ struct MeanStdDevNormKernel
static const std::vector<MeanStdDevNormKernel> available_kernels = {
{"fp32_neon_meanstddevnorm", [](const MeanStdDevNormSelectorData &data) { return data.dt == DataType::F32; },
REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_meanstddevnorm)},
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
{"fp16_neon_meanstddevnorm", [](const MeanStdDevNormSelectorData &data) { return data.dt == DataType::F16; },
REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_meanstddevnorm)},
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
{"qasymm8_neon_meanstddevnorm", [](const MeanStdDevNormSelectorData &data) { return data.dt == DataType::QASYMM8; },
REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_meanstddevnorm)},
};
diff --git a/src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp b/src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp
index 60fda511e3..6a93be0618 100644
--- a/src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp
+++ b/src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -81,7 +81,7 @@ void vector_matrix_multiply_f16(
// window_end_x is computed above which may cause out-of-bound writes to the dst.
for (; x < (window_end_x - window_step_x); x += window_step_x)
{
- if (x > width_matrix_b)
+ if (x >= width_matrix_b)
{
return;
}
@@ -176,7 +176,7 @@ void vector_matrix_multiply_f16(
for (; x < window_end_x; ++x)
{
- if (x > width_matrix_b)
+ if (x >= width_matrix_b)
{
return;
}
diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp
index aba5ff2902..baffa8cbb2 100644
--- a/src/runtime/OMP/OMPScheduler.cpp
+++ b/src/runtime/OMP/OMPScheduler.cpp
@@ -118,9 +118,15 @@ void OMPScheduler::run_workloads(std::vector<arm_compute::IScheduler::Workload>
}
ThreadInfo info;
- info.cpu_info = &cpu_info();
+ info.cpu_info = &cpu_info();
+
+#if !defined(__ANDROID__)
+ info.num_threads = _num_threads;
+#else /* !__ANDROID__ */
info.num_threads = num_threads_to_use;
-#pragma omp parallel for firstprivate(info) num_threads(num_threads_to_use) default(shared) proc_bind(close) \
+#endif /* __ANDROID__ */
+
+#pragma omp parallel for firstprivate(info) num_threads(info.num_threads) default(shared) proc_bind(close) \
schedule(static, 1)
for (unsigned int wid = 0; wid < amount_of_work; ++wid)
{
diff --git a/tests/SConscript b/tests/SConscript
index 0907c5713b..aa5a85a923 100644
--- a/tests/SConscript
+++ b/tests/SConscript
@@ -1,7 +1,7 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
-# Copyright (c) 2017-2023,2024 Arm Limited.
+# Copyright (c) 2017-2023, 2024 Arm Limited.
#
# SPDX-License-Identifier: MIT
#
@@ -23,6 +23,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import os.path
+import SCons
Import('env')
Import('vars')
@@ -187,7 +188,27 @@ if env['fixed_format_kernels'] and test_env['validation_tests']:
test_env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS'])
if test_env['validation_tests']:
- arm_compute_validation_framework = env.StaticLibrary('arm_compute_validation_framework', Glob('validation/reference/*.cpp') + Glob('validation/*.cpp'), LINKFLAGS=test_env['LINKFLAGS'], CXXFLAGS=test_env['CXXFLAGS'], LIBS= [ arm_compute_test_framework ])
+ #Set up to use temp file for long command when building and linking libraries
+ test_env['TEMPFILE'] = SCons.Platform.TempFileMunge
+
+ #To use temp file for any command, the following pattern should be used:
+ # env['COMMAND'] = "{$TEMPFILE('$COMMANDSTRING')}"
+ #See: https://github.com/SCons/scons/blob/05f2992377844bbfec9bcd4a9c7f5479c634b91b/SCons/Platform/__init__.py#L147
+ #The commands' string are taken from https://github.com/SCons/scons
+ #The commands' explanations are taken from Scons userguide
+
+ #The command line used to compile C++ source file to an object files
+ test_env['CXXCOM'] = "${TEMPFILE('"+ test_env['CXXCOM'] + "')}"
+ #The command line used to generate a static library from object files
+ test_env['ARCOM'] = "${TEMPFILE('"+ test_env['ARCOM'] + "')}"
+ #The command line used to index a static library archive
+ test_env['RANLIBCOM'] = "${TEMPFILE('"+ test_env['RANLIBCOM'] + "')}"
+ #The command line used to link object files into an executable
+ test_env['LINKCOM'] = "${TEMPFILE('"+ test_env['LINKCOM'] + "')}"
+ #Set up directory for temp files. To prevent permission issue, the temp files are in the same directory with output files
+ test_env['TEMPFILEDIR'] = test_env['build_dir']
+
+ arm_compute_validation_framework = test_env.StaticLibrary('arm_compute_validation_framework', Glob('validation/reference/*.cpp') + Glob('validation/*.cpp'), LINKFLAGS=test_env['LINKFLAGS'], CXXFLAGS=test_env['CXXFLAGS'], LIBS= [ arm_compute_test_framework ])
Depends(arm_compute_validation_framework , arm_compute_test_framework)
program_objects = files_validation + common_objects
diff --git a/tests/framework/SConscript b/tests/framework/SConscript
index 450ffd77b0..65fd5d5b77 100644
--- a/tests/framework/SConscript
+++ b/tests/framework/SConscript
@@ -1,7 +1,7 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
-# Copyright (c) 2017-2022 Arm Limited.
+# Copyright (c) 2017-2022, 2024 Arm Limited.
#
# SPDX-License-Identifier: MIT
#
@@ -75,6 +75,24 @@ if not framework_env['mali']:
else:
framework_env.Append(CPPDEFINES = ['MALI_ENABLED'])
+#Set up to use temp file for long command when building and linking libraries
+framework_env['TEMPFILE'] = SCons.Platform.TempFileMunge
+
+#To use temp file for any command, the following pattern should be used:
+# env['COMMAND'] = "{$TEMPFILE('$COMMANDSTRING')}"
+#See: https://github.com/SCons/scons/blob/05f2992377844bbfec9bcd4a9c7f5479c634b91b/SCons/Platform/__init__.py#L147
+#The commands' string are taken from https://github.com/SCons/scons
+#The commands' explanations are taken from Scons userguide
+
+#The command line used to compile C++ source file to an object file
+framework_env['CXXCOM'] = "${TEMPFILE('"+ framework_env['CXXCOM'] + "')}"
+#The command line used to generate a static library from object files
+framework_env['ARCOM'] = "${TEMPFILE('"+ framework_env['ARCOM'] + "')}"
+#The command line used to index a static library archive
+framework_env['RANLIBCOM'] = "${TEMPFILE('"+ framework_env['RANLIBCOM'] + "')}"
+#Set up directory for temp files. To prevent permission issue, the temp files are in the same directory with output files
+framework_env['TEMPFILEDIR'] = framework_env['build_dir']
+
arm_compute_test_framework = framework_env.StaticLibrary('arm_compute_test_framework', files)
Default(arm_compute_test_framework)
diff --git a/tests/validation/fixtures/DirectConvolution3DFixture.h b/tests/validation/fixtures/DirectConvolution3DFixture.h
index e80ad2f54f..e27a41a23b 100644
--- a/tests/validation/fixtures/DirectConvolution3DFixture.h
+++ b/tests/validation/fixtures/DirectConvolution3DFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021, 2023 Arm Limited.
+ * Copyright (c) 2021, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,6 +46,7 @@ class DirectConvolution3DValidationGenericFixture : public framework::Fixture
{
public:
using TBias = typename std::conditional < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int32_t, T >::type;
+ using TAcc = typename std::conditional < std::is_integral<T>::value, int32_t, float >::type;
void setup(const TensorShape &input_shape, int stride_x, int stride_y, int stride_z, int pad_x, int pad_y, int pad_z, unsigned int kernel_width, int kernel_height, int kernel_depth,
unsigned int num_kernels, bool has_bias, const ActivationLayerInfo &act_info, const DataType &data_type, const DataLayout &data_layout,
@@ -150,7 +151,7 @@ protected:
fill(bias, 2);
}
- return reference::activation_layer(reference::conv3d<T, TBias>(src, weights, bias, dst, conv3d_info), conv3d_info.act_info);
+ return reference::activation_layer(reference::conv3d<T, TBias, TAcc>(src, weights, bias, dst, conv3d_info), conv3d_info.act_info);
}
TensorType _target{};
diff --git a/tests/validation/reference/Conv3D.cpp b/tests/validation/reference/Conv3D.cpp
index e4010a507a..38472a9aec 100644
--- a/tests/validation/reference/Conv3D.cpp
+++ b/tests/validation/reference/Conv3D.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021, 2023 Arm Limited.
+ * Copyright (c) 2021, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -58,7 +58,7 @@ inline bool is_valid_pixel(int i, int min, int max)
}
// Evaluate the weights against an element in a given tensor.
-template < typename T, typename TB, typename std::enable_if < validation::is_floating_point<T>::value &&validation::is_floating_point<TB>::value, int >::type = 0 >
+template < typename T, typename TB, typename TACC, typename std::enable_if < validation::is_floating_point<T>::value &&validation::is_floating_point<TB>::value, int >::type = 0 >
T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const Size3D &dilation, int batch,
int z_start, int y_start, int x_start, int ch_out, UniformQuantizationInfo oq_info)
{
@@ -73,7 +73,7 @@ T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, c
const unsigned int src_height = src.shape()[height_dim];
const unsigned int src_depth = src.shape()[depth_dim];
- T total(0);
+ TACC total(0);
for(unsigned int weight_d = 0; weight_d < weights_depth; ++weight_d)
{
const int idx_z = z_start + dilation.depth * weight_d;
@@ -112,10 +112,10 @@ T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, c
const TB *b_ptr = bias.data();
TB bias_value = b_ptr[ch_out];
- return total + bias_value;
+ return static_cast<T>(total) + bias_value;
}
-template < typename T, typename TB, ARM_COMPUTE_REQUIRES_TA(std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) >
+template < typename T, typename TB, typename TACC, ARM_COMPUTE_REQUIRES_TA(std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) >
T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const Size3D &dilation, int batch,
int z_start, int y_start, int x_start, int ch_out, UniformQuantizationInfo oq_info)
{
@@ -143,7 +143,7 @@ T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, c
const float multiplier = input_scale * weights_scale / output_scale;
arm_compute::quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
- int32_t total(0);
+ TACC total(0);
for(unsigned int weight_d = 0; weight_d < weights_depth; ++weight_d)
{
const int idx_z = z_start + dilation.depth * weight_d;
@@ -189,7 +189,7 @@ T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, c
}
} // namespace
-template <typename T, typename TB>
+template <typename T, typename TB, typename TACC = T>
SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &dst, const Conv3dInfo &conv3d_info)
{
// Compute reference
@@ -237,7 +237,7 @@ SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weight
T *out_ptr = dst.data();
const int out_offset = coord2index(dst.shape(), Coordinates{ ch_out, x_out, y_out, z_out, batch });
- out_ptr[out_offset] = calculate_conv3d<T, TB>(src, weights, bias, conv3d_info.dilation, batch, z_start, y_start, x_start, ch_out, dst.quantization_info().uniform());
+ out_ptr[out_offset] = calculate_conv3d<T, TB, TACC>(src, weights, bias, conv3d_info.dilation, batch, z_start, y_start, x_start, ch_out, dst.quantization_info().uniform());
}
}
}
@@ -246,13 +246,13 @@ SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weight
return dst;
}
-template SimpleTensor<float> conv3d(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, SimpleTensor<float> &dst,
+template SimpleTensor<float> conv3d<float, float, float>(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, SimpleTensor<float> &dst,
const Conv3dInfo &conv3d_info);
-template SimpleTensor<half> conv3d(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, SimpleTensor<half> &dst,
+template SimpleTensor<half> conv3d<half, half, float>(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, SimpleTensor<half> &dst,
const Conv3dInfo &conv3d_info);
-template SimpleTensor<uint8_t> conv3d(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<uint8_t> &dst,
+template SimpleTensor<uint8_t> conv3d<uint8_t, int32_t, int32_t>(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<uint8_t> &dst,
const Conv3dInfo &conv3d_info);
-template SimpleTensor<int8_t> conv3d(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<int8_t> &dst,
+template SimpleTensor<int8_t> conv3d<int8_t, int32_t, int32_t>(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<int8_t> &dst,
const Conv3dInfo &conv3d_info);
} // namespace reference
} // namespace validation
diff --git a/tests/validation/reference/Conv3D.h b/tests/validation/reference/Conv3D.h
index e3674f4bfb..a440b15d55 100644
--- a/tests/validation/reference/Conv3D.h
+++ b/tests/validation/reference/Conv3D.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_CONV3D_LAYER_H
-#define ARM_COMPUTE_TEST_CONV3D_LAYER_H
+#ifndef ACL_TESTS_VALIDATION_REFERENCE_CONV3D_H
+#define ACL_TESTS_VALIDATION_REFERENCE_CONV3D_H
#include "Utils.h"
#include "arm_compute/runtime/FunctionDescriptors.h"
@@ -37,11 +37,11 @@ namespace validation
{
namespace reference
{
-template <typename T, typename TB>
+template <typename T, typename TB, typename TACC>
SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &dst,
const Conv3dInfo &conv3d_info);
} // namespace reference
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_CONV3D_LAYER_H */
+#endif // ACL_TESTS_VALIDATION_REFERENCE_CONV3D_H