diff options
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | README.md | 24 | ||||
-rw-r--r-- | SConscript | 30 | ||||
-rw-r--r-- | docs/user_guide/errata.dox | 10 | ||||
-rw-r--r-- | docs/user_guide/release_version_and_change_log.dox | 4 | ||||
-rw-r--r-- | src/core/CPP/CPPTypes.cpp | 5 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp | 6 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp | 6 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp | 6 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp | 6 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp | 6 | ||||
-rw-r--r-- | src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp | 6 | ||||
-rw-r--r-- | src/runtime/OMP/OMPScheduler.cpp | 10 | ||||
-rw-r--r-- | tests/SConscript | 25 | ||||
-rw-r--r-- | tests/framework/SConscript | 20 | ||||
-rw-r--r-- | tests/validation/fixtures/DirectConvolution3DFixture.h | 5 | ||||
-rw-r--r-- | tests/validation/reference/Conv3D.cpp | 24 | ||||
-rw-r--r-- | tests/validation/reference/Conv3D.h | 10 |
18 files changed, 145 insertions, 60 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index c67479ce41..f291534201 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,7 +28,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR) list(APPEND CMAKE_MESSAGE_CONTEXT ArmCompute) project( ArmCompute - VERSION 36.0.0 + VERSION 38.0.0 DESCRIPTION "The Arm Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A CPU and Arm® Mali™ GPU architectures" LANGUAGES C CXX ASM) @@ -9,7 +9,7 @@ <img src="https://raw.githubusercontent.com/ARM-software/ComputeLibrary/gh-pages/ACL_logo.png"/><br><br> </div> -# Compute Library ![](https://img.shields.io/badge/latest_release-{{UNRELEASED}}-green) +# Compute Library ![](https://img.shields.io/badge/latest_release-24.06-green) The Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A, Arm® Neoverse® and Arm® Mali™ GPUs architectures.<br> @@ -37,7 +37,7 @@ Key Features: <br> ## Documentation -[![Documentation](https://img.shields.io/badge/documentation-{{UNRELEASED}}-green)](https://arm-software.github.io/ComputeLibrary/latest) +[![Documentation](https://img.shields.io/badge/documentation-24.06-green)](https://arm-software.github.io/ComputeLibrary/latest) > Note: The documentation includes the reference API, changelogs, build guide, contribution guide, errata, etc. @@ -50,24 +50,24 @@ All the binaries can be downloaded from [here](https://github.com/ARM-software/C | Platform | Operating System | Release archive (Download) | | -------------- | ---------------- | -------------------------- | -| Raspberry Pi 4 | Linux® 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-armv7a-neon.tar.gz) | -| Raspberry Pi 4 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-neon.tar.gz) | -| Odroid N2 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-neon-cl.tar.gz) | -| HiKey960 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-neon-cl.tar.gz) | +| Raspberry Pi 4 | Linux® 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-armv7a-neon.tar.gz) | +| Raspberry Pi 4 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon.tar.gz) | +| Odroid N2 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon-cl.tar.gz) | +| HiKey960 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon-cl.tar.gz) | <br> | Architecture | Operating System | Release archive (Download) | | ------------ | ---------------- | -------------------------- | -| armv7 | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-armv7a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-armv7a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-armv7a-neon-cl.tar.gz) | -| arm64-v8a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-android-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-android-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-android-arm64-v8a-neon-cl.tar.gz) | -| arm64-v8a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-neon-cl.tar.gz) | -| arm64-v8.2-a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-android-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-android-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-android-arm64-v8.2-a-neon-cl.tar.gz) | -| arm64-v8.2-a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8.2-a-neon-cl.tar.gz) | +| armv7 | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-armv7a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-armv7a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-armv7a-neon-cl.tar.gz) | +| arm64-v8a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8a-neon-cl.tar.gz) | +| arm64-v8a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon-cl.tar.gz) | +| arm64-v8.2-a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8.2-a-neon-cl.tar.gz) | +| arm64-v8.2-a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8.2-a-neon-cl.tar.gz) | <br> -Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v{{UNRELEASED}}-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v{{UNRELEASED}}) +Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v24.06-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v24.06) Pre-build binaries are generated with the following security / good coding practices related flags: > -Wall, -Wextra, -Wformat=2, -Winit-self, -Wstrict-overflow=2, -Wswitch-default, -Woverloaded-virtual, -Wformat-security, -Wctor-dtor-privacy, -Wsign-promo, -Weffc++, -pedantic, -fstack-protector-strong diff --git a/SConscript b/SConscript index 80aa87cae8..069e0906f4 100644 --- a/SConscript +++ b/SConscript @@ -31,9 +31,10 @@ import zlib import json import codecs import platform +import SCons -VERSION = "v0.0-unreleased" -LIBRARY_VERSION_MAJOR = 36 +VERSION = "v24.06" +LIBRARY_VERSION_MAJOR = 38 LIBRARY_VERSION_MINOR = 0 LIBRARY_VERSION_PATCH = 0 SONAME_VERSION = str(LIBRARY_VERSION_MAJOR) + "." + str(LIBRARY_VERSION_MINOR) + "." + str(LIBRARY_VERSION_PATCH) @@ -151,6 +152,31 @@ def get_ckw_obj_list(): def build_library(name, build_env, sources, static=False, libs=[]): cloned_build_env = build_env.Clone() + + #Set up to use temp file for long command when building and linking libraries + cloned_build_env['TEMPFILE'] = SCons.Platform.TempFileMunge + + #To use temp file for any command, the following pattern should be used: + # env['COMMAND'] = "{$TEMPFILE('$COMMANDSTRING')}" + #See: https://github.com/SCons/scons/blob/05f2992377844bbfec9bcd4a9c7f5479c634b91b/SCons/Platform/__init__.py#L147 + #The commands' string are taken from https://github.com/SCons/scons + #The commands' explanations are taken from Scons userguide + + #The command line used to compile C++ source file to an object file + cloned_build_env['CXXCOM'] = "${TEMPFILE('"+ cloned_build_env['CXXCOM'] + "')}" + #The command line used to compile C++ source file to a shared-library object file + cloned_build_env['SHCXXCOM'] = "${TEMPFILE('"+ cloned_build_env['SHCXXCOM'] + "')}" + #The command line used to generate a static library from object files + cloned_build_env['ARCOM'] = "${TEMPFILE('"+ cloned_build_env['ARCOM'] + "')}" + #The command line used to link object files into an executable + cloned_build_env['LINKCOM'] = "${TEMPFILE('"+ cloned_build_env['LINKCOM'] + "')}" + #The command line used to link programs using shared libraries + cloned_build_env['SHLINKCOM'] = "${TEMPFILE('"+ cloned_build_env['SHLINKCOM'] + "')}" + #The command line used to index a static library archive + cloned_build_env['RANLIBCOM'] = "${TEMPFILE('"+ cloned_build_env['RANLIBCOM'] + "')}" + #Set up directory for temp files. To prevent permission issue, the temp files are in the same directory with output files + cloned_build_env['TEMPFILEDIR'] = cloned_build_env['build_dir'] + if env['os'] == 'android' and static == False: cloned_build_env["LINKFLAGS"].remove('-pie') cloned_build_env["LINKFLAGS"].remove('-static-libstdc++') diff --git a/docs/user_guide/errata.dox b/docs/user_guide/errata.dox index 056e45a432..c195dc7851 100644 --- a/docs/user_guide/errata.dox +++ b/docs/user_guide/errata.dox @@ -1,5 +1,5 @@ /// -/// Copyright (c) 2019-2023 Arm Limited. +/// Copyright (c) 2019-2024 Arm Limited. /// /// SPDX-License-Identifier: MIT /// @@ -30,6 +30,14 @@ namespace arm_compute @section S7_1_errata Errata +- (COMPMID-6904) Fix out-of-bound memory write for non-optimized FP16 GeMM kernel. + - Versions: >= v17.09 && < v24.06 + - Oses: Linux, Android, MacOS, Windows. + - Conditions: + - Compile the latest Arm Compute Library for armv8.2-a or multi_isa + - Device with FP16 support + - GeMM with beta coefficient != 0 or 1 + - (COMPMID-6493) Crash when running Arm Compute Library compiled for SVE2 on a computer that support SVE only. - Versions: >= v21.02 && <=v23.08 - OSs: Linux, Android. diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox index d9c2c8476d..16664c8d84 100644 --- a/docs/user_guide/release_version_and_change_log.dox +++ b/docs/user_guide/release_version_and_change_log.dox @@ -41,7 +41,9 @@ If there is more than one release in a month then an extra sequential number is @section S2_2_changelog Changelog -v24.08 Public major release +v24.06 Public minor release + - Enable FP16 in multiple Neon™ kernels for multi_isa + v8a + - Fix OpenMP® thread scheduling for large machine - Optimize CPU activation functions using LUT-based implementation: - Tanh function for FP16. diff --git a/src/core/CPP/CPPTypes.cpp b/src/core/CPP/CPPTypes.cpp index 67fbce490f..ee39210fa5 100644 --- a/src/core/CPP/CPPTypes.cpp +++ b/src/core/CPP/CPPTypes.cpp @@ -140,7 +140,10 @@ unsigned int CPUInfo::get_L2_cache_size() const unsigned long CPUInfo::get_sme2_vector_length() const { #ifdef ARM_COMPUTE_ENABLE_SME2 - return arm_gemm::utils::sme::get_vector_length<int8_t>(); + if (this->has_sme2()) + return arm_gemm::utils::sme::get_vector_length<int8_t>(); + else + return 0; #else // ARM_COMPUTE_ENABLE_SME2 return 0; #endif // ARM_COMPUTE_ENABLE_SME2 diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp index 717fd11485..153c36052a 100644 --- a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021, 2023 Arm Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -78,11 +78,11 @@ static const BatchNormalizationKernel available_kernels[] = { REGISTER_FP32_SVE(arm_compute::cpu::fp32_sve_batch_normalization)}, #endif /* !defined(ARM_COMPUTE_ENABLE_SVE) */ #if defined(ARM_COMPUTE_ENABLE_NEON) -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +#if ARM_COMPUTE_ENABLE_FP16 {"neon_fp16_batch_normalization", [](const BatchNormalizationSelectorData &data) { return data.dt == DataType::F16; }, REGISTER_FP16_NEON(arm_compute::cpu::fp16_neon_batch_normalization)}, -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ {"neon_fp32_batch_normalization", [](const BatchNormalizationSelectorData &data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::fp32_neon_batch_normalization)}, diff --git a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp index cb869838e2..694def1a3a 100644 --- a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp +++ b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022 Arm Limited. + * Copyright (c) 2019-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -63,11 +63,11 @@ static const BoundingBoxTransformKernel available_kernels[] = { {"fp32_neon_boundingboxtransform", [](const BoundingBoxTransformSelectorData &data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_boundingboxtransform)}, -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 {"fp16_neon_boundingboxtransform", [](const BoundingBoxTransformSelectorData &data) { return data.dt == DataType::F16; }, REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_boundingboxtransform)}, -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 #if defined(ARM_COMPUTE_ENABLE_NEON) {"qu16_neon_boundingboxtransform", [](const BoundingBoxTransformSelectorData &data) { return data.dt == DataType::QASYMM16; }, diff --git a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp index 549319e49f..e23e3d020f 100644 --- a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp +++ b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022 Arm Limited. + * Copyright (c) 2019-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -61,10 +61,10 @@ static const ComputeAllAnchorsKernel available_kernels[] = { {"neon_qu16_computeallanchors", [](const ComputeAllAnchorsData &data) { return data.dt == DataType::QSYMM16; }, REGISTER_QSYMM16_NEON(arm_compute::cpu::neon_qu16_computeallanchors)}, #endif //defined(ARM_COMPUTE_ENABLE_NEON) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 {"neon_fp16_computeallanchors", [](const ComputeAllAnchorsData &data) { return data.dt == DataType::F16; }, REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_computeallanchors)}, -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 {"neon_fp32_computeallanchors", [](const ComputeAllAnchorsData &data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_computeallanchors)}, }; diff --git a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp index 0a1780f6ee..5883731088 100644 --- a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022 Arm Limited. + * Copyright (c) 2019-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -70,10 +70,10 @@ struct InstanceNormKernel static const InstanceNormKernel available_kernels[] = { {"fp32_neon_instancenorm", [](const InstanceNormSelectorData &data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_instancenorm)}, -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 {"fp16_neon_instancenorm", [](const InstanceNormSelectorData &data) { return data.dt == DataType::F16; }, REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_instancenorm)}, -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 }; /** Micro-kernel selector diff --git a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp index 451031d696..cfe4ac9a4c 100644 --- a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp +++ b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022 Arm Limited. + * Copyright (c) 2019-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -60,10 +60,10 @@ struct MeanStdDevNormKernel static const std::vector<MeanStdDevNormKernel> available_kernels = { {"fp32_neon_meanstddevnorm", [](const MeanStdDevNormSelectorData &data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_meanstddevnorm)}, -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 {"fp16_neon_meanstddevnorm", [](const MeanStdDevNormSelectorData &data) { return data.dt == DataType::F16; }, REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_meanstddevnorm)}, -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 {"qasymm8_neon_meanstddevnorm", [](const MeanStdDevNormSelectorData &data) { return data.dt == DataType::QASYMM8; }, REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_meanstddevnorm)}, }; diff --git a/src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp b/src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp index 60fda511e3..6a93be0618 100644 --- a/src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp +++ b/src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -81,7 +81,7 @@ void vector_matrix_multiply_f16( // window_end_x is computed above which may cause out-of-bound writes to the dst. for (; x < (window_end_x - window_step_x); x += window_step_x) { - if (x > width_matrix_b) + if (x >= width_matrix_b) { return; } @@ -176,7 +176,7 @@ void vector_matrix_multiply_f16( for (; x < window_end_x; ++x) { - if (x > width_matrix_b) + if (x >= width_matrix_b) { return; } diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp index aba5ff2902..baffa8cbb2 100644 --- a/src/runtime/OMP/OMPScheduler.cpp +++ b/src/runtime/OMP/OMPScheduler.cpp @@ -118,9 +118,15 @@ void OMPScheduler::run_workloads(std::vector<arm_compute::IScheduler::Workload> } ThreadInfo info; - info.cpu_info = &cpu_info(); + info.cpu_info = &cpu_info(); + +#if !defined(__ANDROID__) + info.num_threads = _num_threads; +#else /* !__ANDROID__ */ info.num_threads = num_threads_to_use; -#pragma omp parallel for firstprivate(info) num_threads(num_threads_to_use) default(shared) proc_bind(close) \ +#endif /* __ANDROID__ */ + +#pragma omp parallel for firstprivate(info) num_threads(info.num_threads) default(shared) proc_bind(close) \ schedule(static, 1) for (unsigned int wid = 0; wid < amount_of_work; ++wid) { diff --git a/tests/SConscript b/tests/SConscript index 0907c5713b..aa5a85a923 100644 --- a/tests/SConscript +++ b/tests/SConscript @@ -1,7 +1,7 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -# Copyright (c) 2017-2023,2024 Arm Limited. +# Copyright (c) 2017-2023, 2024 Arm Limited. # # SPDX-License-Identifier: MIT # @@ -23,6 +23,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import os.path +import SCons Import('env') Import('vars') @@ -187,7 +188,27 @@ if env['fixed_format_kernels'] and test_env['validation_tests']: test_env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS']) if test_env['validation_tests']: - arm_compute_validation_framework = env.StaticLibrary('arm_compute_validation_framework', Glob('validation/reference/*.cpp') + Glob('validation/*.cpp'), LINKFLAGS=test_env['LINKFLAGS'], CXXFLAGS=test_env['CXXFLAGS'], LIBS= [ arm_compute_test_framework ]) + #Set up to use temp file for long command when building and linking libraries + test_env['TEMPFILE'] = SCons.Platform.TempFileMunge + + #To use temp file for any command, the following pattern should be used: + # env['COMMAND'] = "{$TEMPFILE('$COMMANDSTRING')}" + #See: https://github.com/SCons/scons/blob/05f2992377844bbfec9bcd4a9c7f5479c634b91b/SCons/Platform/__init__.py#L147 + #The commands' string are taken from https://github.com/SCons/scons + #The commands' explanations are taken from Scons userguide + + #The command line used to compile C++ source file to an object files + test_env['CXXCOM'] = "${TEMPFILE('"+ test_env['CXXCOM'] + "')}" + #The command line used to generate a static library from object files + test_env['ARCOM'] = "${TEMPFILE('"+ test_env['ARCOM'] + "')}" + #The command line used to index a static library archive + test_env['RANLIBCOM'] = "${TEMPFILE('"+ test_env['RANLIBCOM'] + "')}" + #The command line used to link object files into an executable + test_env['LINKCOM'] = "${TEMPFILE('"+ test_env['LINKCOM'] + "')}" + #Set up directory for temp files. To prevent permission issue, the temp files are in the same directory with output files + test_env['TEMPFILEDIR'] = test_env['build_dir'] + + arm_compute_validation_framework = test_env.StaticLibrary('arm_compute_validation_framework', Glob('validation/reference/*.cpp') + Glob('validation/*.cpp'), LINKFLAGS=test_env['LINKFLAGS'], CXXFLAGS=test_env['CXXFLAGS'], LIBS= [ arm_compute_test_framework ]) Depends(arm_compute_validation_framework , arm_compute_test_framework) program_objects = files_validation + common_objects diff --git a/tests/framework/SConscript b/tests/framework/SConscript index 450ffd77b0..65fd5d5b77 100644 --- a/tests/framework/SConscript +++ b/tests/framework/SConscript @@ -1,7 +1,7 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -# Copyright (c) 2017-2022 Arm Limited. +# Copyright (c) 2017-2022, 2024 Arm Limited. # # SPDX-License-Identifier: MIT # @@ -75,6 +75,24 @@ if not framework_env['mali']: else: framework_env.Append(CPPDEFINES = ['MALI_ENABLED']) +#Set up to use temp file for long command when building and linking libraries +framework_env['TEMPFILE'] = SCons.Platform.TempFileMunge + +#To use temp file for any command, the following pattern should be used: +# env['COMMAND'] = "{$TEMPFILE('$COMMANDSTRING')}" +#See: https://github.com/SCons/scons/blob/05f2992377844bbfec9bcd4a9c7f5479c634b91b/SCons/Platform/__init__.py#L147 +#The commands' string are taken from https://github.com/SCons/scons +#The commands' explanations are taken from Scons userguide + +#The command line used to compile C++ source file to an object file +framework_env['CXXCOM'] = "${TEMPFILE('"+ framework_env['CXXCOM'] + "')}" +#The command line used to generate a static library from object files +framework_env['ARCOM'] = "${TEMPFILE('"+ framework_env['ARCOM'] + "')}" +#The command line used to index a static library archive +framework_env['RANLIBCOM'] = "${TEMPFILE('"+ framework_env['RANLIBCOM'] + "')}" +#Set up directory for temp files. To prevent permission issue, the temp files are in the same directory with output files +framework_env['TEMPFILEDIR'] = framework_env['build_dir'] + arm_compute_test_framework = framework_env.StaticLibrary('arm_compute_test_framework', files) Default(arm_compute_test_framework) diff --git a/tests/validation/fixtures/DirectConvolution3DFixture.h b/tests/validation/fixtures/DirectConvolution3DFixture.h index e80ad2f54f..e27a41a23b 100644 --- a/tests/validation/fixtures/DirectConvolution3DFixture.h +++ b/tests/validation/fixtures/DirectConvolution3DFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,6 +46,7 @@ class DirectConvolution3DValidationGenericFixture : public framework::Fixture { public: using TBias = typename std::conditional < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int32_t, T >::type; + using TAcc = typename std::conditional < std::is_integral<T>::value, int32_t, float >::type; void setup(const TensorShape &input_shape, int stride_x, int stride_y, int stride_z, int pad_x, int pad_y, int pad_z, unsigned int kernel_width, int kernel_height, int kernel_depth, unsigned int num_kernels, bool has_bias, const ActivationLayerInfo &act_info, const DataType &data_type, const DataLayout &data_layout, @@ -150,7 +151,7 @@ protected: fill(bias, 2); } - return reference::activation_layer(reference::conv3d<T, TBias>(src, weights, bias, dst, conv3d_info), conv3d_info.act_info); + return reference::activation_layer(reference::conv3d<T, TBias, TAcc>(src, weights, bias, dst, conv3d_info), conv3d_info.act_info); } TensorType _target{}; diff --git a/tests/validation/reference/Conv3D.cpp b/tests/validation/reference/Conv3D.cpp index e4010a507a..38472a9aec 100644 --- a/tests/validation/reference/Conv3D.cpp +++ b/tests/validation/reference/Conv3D.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -58,7 +58,7 @@ inline bool is_valid_pixel(int i, int min, int max) } // Evaluate the weights against an element in a given tensor. -template < typename T, typename TB, typename std::enable_if < validation::is_floating_point<T>::value &&validation::is_floating_point<TB>::value, int >::type = 0 > +template < typename T, typename TB, typename TACC, typename std::enable_if < validation::is_floating_point<T>::value &&validation::is_floating_point<TB>::value, int >::type = 0 > T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const Size3D &dilation, int batch, int z_start, int y_start, int x_start, int ch_out, UniformQuantizationInfo oq_info) { @@ -73,7 +73,7 @@ T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, c const unsigned int src_height = src.shape()[height_dim]; const unsigned int src_depth = src.shape()[depth_dim]; - T total(0); + TACC total(0); for(unsigned int weight_d = 0; weight_d < weights_depth; ++weight_d) { const int idx_z = z_start + dilation.depth * weight_d; @@ -112,10 +112,10 @@ T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, c const TB *b_ptr = bias.data(); TB bias_value = b_ptr[ch_out]; - return total + bias_value; + return static_cast<T>(total) + bias_value; } -template < typename T, typename TB, ARM_COMPUTE_REQUIRES_TA(std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) > +template < typename T, typename TB, typename TACC, ARM_COMPUTE_REQUIRES_TA(std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) > T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const Size3D &dilation, int batch, int z_start, int y_start, int x_start, int ch_out, UniformQuantizationInfo oq_info) { @@ -143,7 +143,7 @@ T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, c const float multiplier = input_scale * weights_scale / output_scale; arm_compute::quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift); - int32_t total(0); + TACC total(0); for(unsigned int weight_d = 0; weight_d < weights_depth; ++weight_d) { const int idx_z = z_start + dilation.depth * weight_d; @@ -189,7 +189,7 @@ T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, c } } // namespace -template <typename T, typename TB> +template <typename T, typename TB, typename TACC = T> SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &dst, const Conv3dInfo &conv3d_info) { // Compute reference @@ -237,7 +237,7 @@ SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weight T *out_ptr = dst.data(); const int out_offset = coord2index(dst.shape(), Coordinates{ ch_out, x_out, y_out, z_out, batch }); - out_ptr[out_offset] = calculate_conv3d<T, TB>(src, weights, bias, conv3d_info.dilation, batch, z_start, y_start, x_start, ch_out, dst.quantization_info().uniform()); + out_ptr[out_offset] = calculate_conv3d<T, TB, TACC>(src, weights, bias, conv3d_info.dilation, batch, z_start, y_start, x_start, ch_out, dst.quantization_info().uniform()); } } } @@ -246,13 +246,13 @@ SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weight return dst; } -template SimpleTensor<float> conv3d(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, SimpleTensor<float> &dst, +template SimpleTensor<float> conv3d<float, float, float>(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, SimpleTensor<float> &dst, const Conv3dInfo &conv3d_info); -template SimpleTensor<half> conv3d(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, SimpleTensor<half> &dst, +template SimpleTensor<half> conv3d<half, half, float>(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, SimpleTensor<half> &dst, const Conv3dInfo &conv3d_info); -template SimpleTensor<uint8_t> conv3d(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<uint8_t> &dst, +template SimpleTensor<uint8_t> conv3d<uint8_t, int32_t, int32_t>(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<uint8_t> &dst, const Conv3dInfo &conv3d_info); -template SimpleTensor<int8_t> conv3d(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<int8_t> &dst, +template SimpleTensor<int8_t> conv3d<int8_t, int32_t, int32_t>(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<int8_t> &dst, const Conv3dInfo &conv3d_info); } // namespace reference } // namespace validation diff --git a/tests/validation/reference/Conv3D.h b/tests/validation/reference/Conv3D.h index e3674f4bfb..a440b15d55 100644 --- a/tests/validation/reference/Conv3D.h +++ b/tests/validation/reference/Conv3D.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_CONV3D_LAYER_H -#define ARM_COMPUTE_TEST_CONV3D_LAYER_H +#ifndef ACL_TESTS_VALIDATION_REFERENCE_CONV3D_H +#define ACL_TESTS_VALIDATION_REFERENCE_CONV3D_H #include "Utils.h" #include "arm_compute/runtime/FunctionDescriptors.h" @@ -37,11 +37,11 @@ namespace validation { namespace reference { -template <typename T, typename TB> +template <typename T, typename TB, typename TACC> SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &dst, const Conv3dInfo &conv3d_info); } // namespace reference } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_CONV3D_LAYER_H */ +#endif // ACL_TESTS_VALIDATION_REFERENCE_CONV3D_H |