18 files changed, 145 insertions, 60 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c67479ce41..f291534201 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,7 +28,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
 list(APPEND CMAKE_MESSAGE_CONTEXT ArmCompute)
 project(
   ArmCompute
-  VERSION 36.0.0
+  VERSION 38.0.0
   DESCRIPTION
     "The Arm Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A CPU and Arm® Mali™ GPU architectures"
   LANGUAGES C CXX ASM)
diff --git a/README.md b/README.md
index 71bff561a6..02dd05edac 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
  <img src="https://raw.githubusercontent.com/ARM-software/ComputeLibrary/gh-pages/ACL_logo.png"/><br><br>
 </div>
 
-# Compute Library ![](https://img.shields.io/badge/latest_release-{{UNRELEASED}}-green)
+# Compute Library ![](https://img.shields.io/badge/latest_release-24.06-green)
 
 
 The Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A, Arm® Neoverse® and Arm® Mali™ GPUs architectures.<br>
@@ -37,7 +37,7 @@ Key Features:
 <br>
 
 ## Documentation
-[![Documentation](https://img.shields.io/badge/documentation-{{UNRELEASED}}-green)](https://arm-software.github.io/ComputeLibrary/latest)
+[![Documentation](https://img.shields.io/badge/documentation-24.06-green)](https://arm-software.github.io/ComputeLibrary/latest)
 
 > Note: The documentation includes the reference API, changelogs, build guide, contribution guide, errata, etc.
 
@@ -50,24 +50,24 @@ All the binaries can be downloaded from [here](https://github.com/ARM-software/C
 
 | Platform       | Operating System | Release archive (Download) |
 | -------------- | ---------------- | -------------------------- |
-| Raspberry Pi 4 | Linux® 32bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-armv7a-neon.tar.gz) |
-| Raspberry Pi 4 | Linux® 64bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-neon.tar.gz) |
-| Odroid N2      | Linux® 64bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-neon-cl.tar.gz) |
-| HiKey960       | Linux® 64bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-neon-cl.tar.gz) |
+| Raspberry Pi 4 | Linux® 32bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-armv7a-neon.tar.gz) |
+| Raspberry Pi 4 | Linux® 64bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon.tar.gz) |
+| Odroid N2      | Linux® 64bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon-cl.tar.gz) |
+| HiKey960       | Linux® 64bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon-cl.tar.gz) |
 
 <br>
 
 | Architecture | Operating System | Release archive (Download) |
 | ------------ | ---------------- | -------------------------- |
-| armv7        | Linux®            | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-armv7a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-armv7a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-armv7a-neon-cl.tar.gz) |
-| arm64-v8a    | Android™          | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-android-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-android-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-android-arm64-v8a-neon-cl.tar.gz) |
-| arm64-v8a    | Linux®            | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8a-neon-cl.tar.gz) |
-| arm64-v8.2-a | Android™          | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-android-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-android-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-android-arm64-v8.2-a-neon-cl.tar.gz) |
-| arm64-v8.2-a | Linux®            | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v{{UNRELEASED}}/arm_compute-v{{UNRELEASED}}-bin-linux-arm64-v8.2-a-neon-cl.tar.gz) |
+| armv7        | Linux®            | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-armv7a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-armv7a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-armv7a-neon-cl.tar.gz) |
+| arm64-v8a    | Android™          | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8a-neon-cl.tar.gz) |
+| arm64-v8a    | Linux®            | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8a-neon-cl.tar.gz) |
+| arm64-v8.2-a | Android™          | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-android-arm64-v8.2-a-neon-cl.tar.gz) |
+| arm64-v8.2-a | Linux®            | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.06/arm_compute-v24.06-bin-linux-arm64-v8.2-a-neon-cl.tar.gz) |
 
 <br>
 
-Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v{{UNRELEASED}}-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v{{UNRELEASED}})
+Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v24.06-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v24.06)
 
 Pre-build binaries are generated with the following security / good coding practices related flags:
 > -Wall, -Wextra, -Wformat=2, -Winit-self, -Wstrict-overflow=2, -Wswitch-default, -Woverloaded-virtual, -Wformat-security, -Wctor-dtor-privacy, -Wsign-promo, -Weffc++, -pedantic, -fstack-protector-strong
diff --git a/SConscript b/SConscript
index 80aa87cae8..069e0906f4 100644
--- a/SConscript
+++ b/SConscript
@@ -31,9 +31,10 @@ import zlib
 import json
 import codecs
 import platform
+import SCons
 
-VERSION = "v0.0-unreleased"
-LIBRARY_VERSION_MAJOR = 36
+VERSION = "v24.06"
+LIBRARY_VERSION_MAJOR = 38
 LIBRARY_VERSION_MINOR =  0
 LIBRARY_VERSION_PATCH =  0
 SONAME_VERSION = str(LIBRARY_VERSION_MAJOR) + "." + str(LIBRARY_VERSION_MINOR) + "." + str(LIBRARY_VERSION_PATCH)
@@ -151,6 +152,31 @@ def get_ckw_obj_list():
 
 def build_library(name, build_env, sources, static=False, libs=[]):
     cloned_build_env = build_env.Clone()
+
+    #Set up to use temp file for long command when building and linking libraries
+    cloned_build_env['TEMPFILE'] = SCons.Platform.TempFileMunge
+
+    #To use temp file for any command, the following pattern should be used:
+    #   env['COMMAND'] = "{$TEMPFILE('$COMMANDSTRING')}"
+    #See: https://github.com/SCons/scons/blob/05f2992377844bbfec9bcd4a9c7f5479c634b91b/SCons/Platform/__init__.py#L147
+    #The commands' string are taken from https://github.com/SCons/scons
+    #The commands' explanations are taken from Scons userguide
+
+    #The command line used to compile C++ source file to an object file
+    cloned_build_env['CXXCOM'] = "${TEMPFILE('"+ cloned_build_env['CXXCOM'] + "')}"
+    #The command line used to compile C++ source file to a shared-library object file
+    cloned_build_env['SHCXXCOM'] = "${TEMPFILE('"+ cloned_build_env['SHCXXCOM'] + "')}"
+    #The command line used to generate a static library from object files
+    cloned_build_env['ARCOM'] = "${TEMPFILE('"+ cloned_build_env['ARCOM'] + "')}"
+    #The command line used to link object files into an executable
+    cloned_build_env['LINKCOM'] = "${TEMPFILE('"+ cloned_build_env['LINKCOM'] + "')}"
+    #The command line used to link programs using shared libraries
+    cloned_build_env['SHLINKCOM'] = "${TEMPFILE('"+ cloned_build_env['SHLINKCOM'] + "')}"
+    #The command line used to index a static library archive
+    cloned_build_env['RANLIBCOM'] = "${TEMPFILE('"+ cloned_build_env['RANLIBCOM'] + "')}"
+    #Set up directory for temp files. To prevent permission issue, the temp files are in the same directory with output files
+    cloned_build_env['TEMPFILEDIR'] = cloned_build_env['build_dir']
+
     if env['os'] == 'android' and static == False:
         cloned_build_env["LINKFLAGS"].remove('-pie')
         cloned_build_env["LINKFLAGS"].remove('-static-libstdc++')
diff --git a/docs/user_guide/errata.dox b/docs/user_guide/errata.dox
index 056e45a432..c195dc7851 100644
--- a/docs/user_guide/errata.dox
+++ b/docs/user_guide/errata.dox
@@ -1,5 +1,5 @@
 ///
-/// Copyright (c) 2019-2023 Arm Limited.
+/// Copyright (c) 2019-2024 Arm Limited.
 ///
 /// SPDX-License-Identifier: MIT
 ///
@@ -30,6 +30,14 @@ namespace arm_compute
 
 @section S7_1_errata Errata
 
+- (COMPMID-6904) Fix out-of-bound memory write for non-optimized FP16 GeMM kernel.
+    - Versions: >= v17.09 && < v24.06
+    - Oses: Linux, Android, MacOS, Windows.
+    - Conditions:
+        - Compile the latest Arm Compute Library for armv8.2-a or multi_isa
+        - Device with FP16 support
+        - GeMM with beta coefficient != 0 or 1
+
 - (COMPMID-6493) Crash when running Arm Compute Library compiled for SVE2 on a computer that support SVE only.
     - Versions: >= v21.02 && <=v23.08
     - OSs: Linux, Android.
diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox
index d9c2c8476d..16664c8d84 100644
--- a/docs/user_guide/release_version_and_change_log.dox
+++ b/docs/user_guide/release_version_and_change_log.dox
@@ -41,7 +41,9 @@ If there is more than one release in a month then an extra sequential number is
 
 @section S2_2_changelog Changelog
 
-v24.08 Public major release
+v24.06 Public minor release
+ - Enable FP16 in multiple Neon™ kernels for multi_isa + v8a
+ - Fix OpenMP® thread scheduling for large machine
  - Optimize CPU activation functions using LUT-based implementation:
    - Tanh function for FP16.
 
diff --git a/src/core/CPP/CPPTypes.cpp b/src/core/CPP/CPPTypes.cpp
index 67fbce490f..ee39210fa5 100644
--- a/src/core/CPP/CPPTypes.cpp
+++ b/src/core/CPP/CPPTypes.cpp
@@ -140,7 +140,10 @@ unsigned int CPUInfo::get_L2_cache_size() const
 unsigned long CPUInfo::get_sme2_vector_length() const
 {
 #ifdef ARM_COMPUTE_ENABLE_SME2
-    return arm_gemm::utils::sme::get_vector_length<int8_t>();
+    if (this->has_sme2())
+        return arm_gemm::utils::sme::get_vector_length<int8_t>();
+    else
+        return 0;
 #else  // ARM_COMPUTE_ENABLE_SME2
     return 0;
 #endif // ARM_COMPUTE_ENABLE_SME2
diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
index 717fd11485..153c36052a 100644
--- a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, 2023 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -78,11 +78,11 @@ static const BatchNormalizationKernel available_kernels[] = {
      REGISTER_FP32_SVE(arm_compute::cpu::fp32_sve_batch_normalization)},
 #endif /* !defined(ARM_COMPUTE_ENABLE_SVE) */
 #if defined(ARM_COMPUTE_ENABLE_NEON)
-#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
+#if ARM_COMPUTE_ENABLE_FP16
     {"neon_fp16_batch_normalization",
      [](const BatchNormalizationSelectorData &data) { return data.dt == DataType::F16; },
      REGISTER_FP16_NEON(arm_compute::cpu::fp16_neon_batch_normalization)},
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
     {"neon_fp32_batch_normalization",
      [](const BatchNormalizationSelectorData &data) { return data.dt == DataType::F32; },
      REGISTER_FP32_NEON(arm_compute::cpu::fp32_neon_batch_normalization)},
diff --git a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp
index cb869838e2..694def1a3a 100644
--- a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp
+++ b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2022, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -63,11 +63,11 @@ static const BoundingBoxTransformKernel available_kernels[] = {
     {"fp32_neon_boundingboxtransform",
      [](const BoundingBoxTransformSelectorData &data) { return data.dt == DataType::F32; },
      REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_boundingboxtransform)},
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
     {"fp16_neon_boundingboxtransform",
      [](const BoundingBoxTransformSelectorData &data) { return data.dt == DataType::F16; },
      REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_boundingboxtransform)},
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
 #if defined(ARM_COMPUTE_ENABLE_NEON)
     {"qu16_neon_boundingboxtransform",
      [](const BoundingBoxTransformSelectorData &data) { return data.dt == DataType::QASYMM16; },
diff --git a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp
index 549319e49f..e23e3d020f 100644
--- a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2022, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -61,10 +61,10 @@ static const ComputeAllAnchorsKernel available_kernels[] = {
     {"neon_qu16_computeallanchors", [](const ComputeAllAnchorsData &data) { return data.dt == DataType::QSYMM16; },
      REGISTER_QSYMM16_NEON(arm_compute::cpu::neon_qu16_computeallanchors)},
 #endif //defined(ARM_COMPUTE_ENABLE_NEON)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
     {"neon_fp16_computeallanchors", [](const ComputeAllAnchorsData &data) { return data.dt == DataType::F16; },
      REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_computeallanchors)},
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
     {"neon_fp32_computeallanchors", [](const ComputeAllAnchorsData &data) { return data.dt == DataType::F32; },
      REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_computeallanchors)},
 };
diff --git a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp
index 0a1780f6ee..5883731088 100644
--- a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2022, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -70,10 +70,10 @@ struct InstanceNormKernel
 static const InstanceNormKernel available_kernels[] = {
     {"fp32_neon_instancenorm", [](const InstanceNormSelectorData &data) { return data.dt == DataType::F32; },
      REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_instancenorm)},
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
     {"fp16_neon_instancenorm", [](const InstanceNormSelectorData &data) { return data.dt == DataType::F16; },
      REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_instancenorm)},
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
 };
 
 /** Micro-kernel selector
diff --git a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp
index 451031d696..cfe4ac9a4c 100644
--- a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp
+++ b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2022, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -60,10 +60,10 @@ struct MeanStdDevNormKernel
 static const std::vector<MeanStdDevNormKernel> available_kernels = {
     {"fp32_neon_meanstddevnorm", [](const MeanStdDevNormSelectorData &data) { return data.dt == DataType::F32; },
      REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_meanstddevnorm)},
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
     {"fp16_neon_meanstddevnorm", [](const MeanStdDevNormSelectorData &data) { return data.dt == DataType::F16; },
      REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_meanstddevnorm)},
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
     {"qasymm8_neon_meanstddevnorm", [](const MeanStdDevNormSelectorData &data) { return data.dt == DataType::QASYMM8; },
      REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_meanstddevnorm)},
 };
diff --git a/src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp b/src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp
index 60fda511e3..6a93be0618 100644
--- a/src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp
+++ b/src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -81,7 +81,7 @@ void vector_matrix_multiply_f16(
             // window_end_x is computed above which may cause out-of-bound writes to the dst.
             for (; x < (window_end_x - window_step_x); x += window_step_x)
             {
-                if (x > width_matrix_b)
+                if (x >= width_matrix_b)
                 {
                     return;
                 }
@@ -176,7 +176,7 @@ void vector_matrix_multiply_f16(
 
             for (; x < window_end_x; ++x)
             {
-                if (x > width_matrix_b)
+                if (x >= width_matrix_b)
                 {
                     return;
                 }
diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp
index aba5ff2902..baffa8cbb2 100644
--- a/src/runtime/OMP/OMPScheduler.cpp
+++ b/src/runtime/OMP/OMPScheduler.cpp
@@ -118,9 +118,15 @@ void OMPScheduler::run_workloads(std::vector<arm_compute::IScheduler::Workload>
     }
 
     ThreadInfo info;
-    info.cpu_info    = &cpu_info();
+    info.cpu_info = &cpu_info();
+
+#if !defined(__ANDROID__)
+    info.num_threads = _num_threads;
+#else  /* !__ANDROID__ */
     info.num_threads = num_threads_to_use;
-#pragma omp parallel for firstprivate(info) num_threads(num_threads_to_use) default(shared) proc_bind(close) \
+#endif /* __ANDROID__ */
+
+#pragma omp parallel for firstprivate(info) num_threads(info.num_threads) default(shared) proc_bind(close) \
     schedule(static, 1)
     for (unsigned int wid = 0; wid < amount_of_work; ++wid)
     {
diff --git a/tests/SConscript b/tests/SConscript
index 0907c5713b..aa5a85a923 100644
--- a/tests/SConscript
+++ b/tests/SConscript
@@ -1,7 +1,7 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 
-# Copyright (c) 2017-2023,2024 Arm Limited.
+# Copyright (c) 2017-2023, 2024 Arm Limited.
 #
 # SPDX-License-Identifier: MIT
 #
@@ -23,6 +23,7 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 import os.path
+import SCons
 
 Import('env')
 Import('vars')
@@ -187,7 +188,27 @@ if env['fixed_format_kernels'] and test_env['validation_tests']:
     test_env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS'])
 
 if test_env['validation_tests']:
-    arm_compute_validation_framework = env.StaticLibrary('arm_compute_validation_framework', Glob('validation/reference/*.cpp') + Glob('validation/*.cpp'), LINKFLAGS=test_env['LINKFLAGS'], CXXFLAGS=test_env['CXXFLAGS'], LIBS= [ arm_compute_test_framework ])
+    #Set up to use temp file for long command when building and linking libraries
+    test_env['TEMPFILE'] = SCons.Platform.TempFileMunge
+
+    #To use temp file for any command, the following pattern should be used:
+    #   env['COMMAND'] = "{$TEMPFILE('$COMMANDSTRING')}"
+    #See: https://github.com/SCons/scons/blob/05f2992377844bbfec9bcd4a9c7f5479c634b91b/SCons/Platform/__init__.py#L147
+    #The commands' string are taken from https://github.com/SCons/scons
+    #The commands' explanations are taken from Scons userguide
+
+    #The command line used to compile C++ source file to an object files
+    test_env['CXXCOM'] = "${TEMPFILE('"+ test_env['CXXCOM'] + "')}"
+    #The command line used to generate a static library from object files
+    test_env['ARCOM'] = "${TEMPFILE('"+ test_env['ARCOM'] + "')}"
+    #The command line used to index a static library archive
+    test_env['RANLIBCOM'] = "${TEMPFILE('"+ test_env['RANLIBCOM'] + "')}"
+    #The command line used to link object files into an executable
+    test_env['LINKCOM'] = "${TEMPFILE('"+ test_env['LINKCOM'] + "')}"
+    #Set up directory for temp files. To prevent permission issue, the temp files are in the same directory with output files
+    test_env['TEMPFILEDIR'] = test_env['build_dir']
+
+    arm_compute_validation_framework = test_env.StaticLibrary('arm_compute_validation_framework', Glob('validation/reference/*.cpp') + Glob('validation/*.cpp'), LINKFLAGS=test_env['LINKFLAGS'], CXXFLAGS=test_env['CXXFLAGS'], LIBS= [ arm_compute_test_framework ])
     Depends(arm_compute_validation_framework , arm_compute_test_framework)
 
     program_objects = files_validation + common_objects
diff --git a/tests/framework/SConscript b/tests/framework/SConscript
index 450ffd77b0..65fd5d5b77 100644
--- a/tests/framework/SConscript
+++ b/tests/framework/SConscript
@@ -1,7 +1,7 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 
-# Copyright (c) 2017-2022 Arm Limited.
+# Copyright (c) 2017-2022, 2024 Arm Limited.
 #
 # SPDX-License-Identifier: MIT
 #
@@ -75,6 +75,24 @@ if not framework_env['mali']:
 else:
     framework_env.Append(CPPDEFINES = ['MALI_ENABLED'])
 
+#Set up to use temp file for long command when building and linking libraries
+framework_env['TEMPFILE'] = SCons.Platform.TempFileMunge
+
+#To use temp file for any command, the following pattern should be used:
+#   env['COMMAND'] = "{$TEMPFILE('$COMMANDSTRING')}"
+#See: https://github.com/SCons/scons/blob/05f2992377844bbfec9bcd4a9c7f5479c634b91b/SCons/Platform/__init__.py#L147
+#The commands' string are taken from https://github.com/SCons/scons
+#The commands' explanations are taken from Scons userguide
+
+#The command line used to compile C++ source file to an object file
+framework_env['CXXCOM'] = "${TEMPFILE('"+ framework_env['CXXCOM'] + "')}"
+#The command line used to generate a static library from object files
+framework_env['ARCOM'] = "${TEMPFILE('"+ framework_env['ARCOM'] + "')}"
+#The command line used to index a static library archive
+framework_env['RANLIBCOM'] = "${TEMPFILE('"+ framework_env['RANLIBCOM'] + "')}"
+#Set up directory for temp files. To prevent permission issue, the temp files are in the same directory with output files
+framework_env['TEMPFILEDIR'] = framework_env['build_dir']
+
 arm_compute_test_framework = framework_env.StaticLibrary('arm_compute_test_framework', files)
 
 Default(arm_compute_test_framework)
diff --git a/tests/validation/fixtures/DirectConvolution3DFixture.h b/tests/validation/fixtures/DirectConvolution3DFixture.h
index e80ad2f54f..e27a41a23b 100644
--- a/tests/validation/fixtures/DirectConvolution3DFixture.h
+++ b/tests/validation/fixtures/DirectConvolution3DFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, 2023 Arm Limited.
+ * Copyright (c) 2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,6 +46,7 @@ class DirectConvolution3DValidationGenericFixture : public framework::Fixture
 {
 public:
     using TBias = typename std::conditional < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int32_t, T >::type;
+    using TAcc = typename std::conditional < std::is_integral<T>::value, int32_t, float >::type;
 
     void setup(const TensorShape &input_shape, int stride_x, int stride_y, int stride_z, int pad_x, int pad_y, int pad_z, unsigned int kernel_width, int kernel_height, int kernel_depth,
                unsigned int num_kernels, bool has_bias, const ActivationLayerInfo &act_info, const DataType &data_type, const DataLayout &data_layout,
@@ -150,7 +151,7 @@ protected:
             fill(bias, 2);
         }
 
-        return reference::activation_layer(reference::conv3d<T, TBias>(src, weights, bias, dst, conv3d_info), conv3d_info.act_info);
+        return reference::activation_layer(reference::conv3d<T, TBias, TAcc>(src, weights, bias, dst, conv3d_info), conv3d_info.act_info);
     }
 
     TensorType      _target{};
diff --git a/tests/validation/reference/Conv3D.cpp b/tests/validation/reference/Conv3D.cpp
index e4010a507a..38472a9aec 100644
--- a/tests/validation/reference/Conv3D.cpp
+++ b/tests/validation/reference/Conv3D.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, 2023 Arm Limited.
+ * Copyright (c) 2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -58,7 +58,7 @@ inline bool is_valid_pixel(int i, int min, int max)
 }
 
 // Evaluate the weights against an element in a given tensor.
-template < typename T, typename TB, typename std::enable_if < validation::is_floating_point<T>::value &&validation::is_floating_point<TB>::value, int >::type = 0 >
+template < typename T, typename TB, typename TACC, typename std::enable_if < validation::is_floating_point<T>::value &&validation::is_floating_point<TB>::value, int >::type = 0 >
 T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const Size3D &dilation, int batch,
                    int z_start, int y_start, int x_start, int ch_out, UniformQuantizationInfo oq_info)
 {
@@ -73,7 +73,7 @@ T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, c
     const unsigned int src_height   = src.shape()[height_dim];
     const unsigned int src_depth    = src.shape()[depth_dim];
 
-    T total(0);
+    TACC total(0);
     for(unsigned int weight_d = 0; weight_d < weights_depth; ++weight_d)
     {
         const int idx_z = z_start + dilation.depth * weight_d;
@@ -112,10 +112,10 @@ T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, c
     const TB *b_ptr      = bias.data();
     TB        bias_value = b_ptr[ch_out];
 
-    return total + bias_value;
+    return static_cast<T>(total) + bias_value;
 }
 
-template < typename T, typename TB, ARM_COMPUTE_REQUIRES_TA(std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) >
+template < typename T, typename TB, typename TACC, ARM_COMPUTE_REQUIRES_TA(std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) >
 T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const Size3D &dilation, int batch,
                    int z_start, int y_start, int x_start, int ch_out, UniformQuantizationInfo oq_info)
 {
@@ -143,7 +143,7 @@ T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, c
     const float multiplier        = input_scale * weights_scale / output_scale;
     arm_compute::quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
 
-    int32_t total(0);
+    TACC total(0);
     for(unsigned int weight_d = 0; weight_d < weights_depth; ++weight_d)
     {
         const int idx_z = z_start + dilation.depth * weight_d;
@@ -189,7 +189,7 @@ T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, c
 }
 } // namespace
 
-template <typename T, typename TB>
+template <typename T, typename TB, typename TACC = T>
 SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &dst, const Conv3dInfo &conv3d_info)
 {
     // Compute reference
@@ -237,7 +237,7 @@ SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weight
                         T *out_ptr = dst.data();
 
                         const int out_offset = coord2index(dst.shape(), Coordinates{ ch_out, x_out, y_out, z_out, batch });
-                        out_ptr[out_offset]  = calculate_conv3d<T, TB>(src, weights, bias, conv3d_info.dilation, batch, z_start, y_start, x_start, ch_out, dst.quantization_info().uniform());
+                        out_ptr[out_offset]  = calculate_conv3d<T, TB, TACC>(src, weights, bias, conv3d_info.dilation, batch, z_start, y_start, x_start, ch_out, dst.quantization_info().uniform());
                     }
                 }
             }
@@ -246,13 +246,13 @@ SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weight
     return dst;
 }
 
-template SimpleTensor<float> conv3d(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, SimpleTensor<float> &dst,
+template SimpleTensor<float> conv3d<float, float, float>(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, SimpleTensor<float> &dst,
                                     const Conv3dInfo &conv3d_info);
-template SimpleTensor<half> conv3d(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, SimpleTensor<half> &dst,
+template SimpleTensor<half> conv3d<half, half, float>(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, SimpleTensor<half> &dst,
                                    const Conv3dInfo &conv3d_info);
-template SimpleTensor<uint8_t> conv3d(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<uint8_t> &dst,
+template SimpleTensor<uint8_t> conv3d<uint8_t, int32_t, int32_t>(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<uint8_t> &dst,
                                       const Conv3dInfo &conv3d_info);
-template SimpleTensor<int8_t> conv3d(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<int8_t> &dst,
+template SimpleTensor<int8_t> conv3d<int8_t, int32_t, int32_t>(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<int8_t> &dst,
                                      const Conv3dInfo &conv3d_info);
 } // namespace reference
 } // namespace validation
diff --git a/tests/validation/reference/Conv3D.h b/tests/validation/reference/Conv3D.h
index e3674f4bfb..a440b15d55 100644
--- a/tests/validation/reference/Conv3D.h
+++ b/tests/validation/reference/Conv3D.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_CONV3D_LAYER_H
-#define ARM_COMPUTE_TEST_CONV3D_LAYER_H
+#ifndef ACL_TESTS_VALIDATION_REFERENCE_CONV3D_H
+#define ACL_TESTS_VALIDATION_REFERENCE_CONV3D_H
 
 #include "Utils.h"
 #include "arm_compute/runtime/FunctionDescriptors.h"
@@ -37,11 +37,11 @@ namespace validation
 {
 namespace reference
 {
-template <typename T, typename TB>
+template <typename T, typename TB, typename TACC>
 SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &dst,
                        const Conv3dInfo &conv3d_info);
 } // namespace reference
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_CONV3D_LAYER_H */
+#endif // ACL_TESTS_VALIDATION_REFERENCE_CONV3D_H