aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2020-11-18 16:22:16 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2020-12-07 15:52:16 +0000
commite5a9ad8c81c9cf245011839fa373f3603d719ba8 (patch)
tree70058cd9a6e6ba3d3a83b252ebbfaf2ef3546d30
parent7b48166b37d30dc0d651e5f366a691b38a0c8006 (diff)
downloadComputeLibrary-e5a9ad8c81c9cf245011839fa373f3603d719ba8.tar.gz
COMPMID-3869: Update Sconstruct to support SVE/SVE2
Modifying scons to build with SVE/SVE2 Updating the documentation with examples Change-Id: I80875206599d5444b9c21ac75c4a8e4efd30d8b5 Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4629 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
-rw-r--r--SConstruct17
-rw-r--r--docs/00_introduction.dox224
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp2
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp2
4 files changed, 174 insertions, 71 deletions
diff --git a/SConstruct b/SConstruct
index 395fb5e59d..e9ba496afb 100644
--- a/SConstruct
+++ b/SConstruct
@@ -43,8 +43,8 @@ vars.AddVariables(
BoolVariable("asserts", "Enable asserts (this flag is forced to 1 for debug=1)", False),
BoolVariable("logging", "Logging (this flag is forced to 1 for debug=1)", False),
EnumVariable("arch", "Target Architecture", "armv7a",
- allowed_values=("armv7a", "arm64-v8a", "arm64-v8.2-a", "arm64-v8.2-a-sve", "x86_32", "x86_64",
- "armv8a", "armv8.2-a", "armv8.2-a-sve", "armv8.6-a", "armv8.6-a-sve", "x86")),
+ allowed_values=("armv7a", "arm64-v8a", "arm64-v8.2-a", "arm64-v8.2-a-sve", "arm64-v8.2-a-sve2", "x86_32", "x86_64",
+ "armv8a", "armv8.2-a", "armv8.2-a-sve", "armv8.6-a", "armv8.6-a-sve", "armv8.6-a-sve2", "x86")),
EnumVariable("estate", "Execution State", "auto", allowed_values=("auto", "32", "64")),
EnumVariable("os", "Target OS", "linux", allowed_values=("linux", "android", "tizen", "bare_metal")),
EnumVariable("build", "Build type", "cross_compile", allowed_values=("native", "cross_compile", "embed_only")),
@@ -72,7 +72,8 @@ vars.AddVariables(
("compiler_prefix", "Override the compiler prefix", ""),
("extra_cxx_flags", "Extra CXX flags to be appended to the build command", ""),
("extra_link_flags", "Extra LD flags to be appended to the build command", ""),
- ("compiler_cache", "Command to prefix to the C and C++ compiler (e.g ccache)", "")
+ ("compiler_cache", "Command to prefix to the C and C++ compiler (e.g ccache)", ""),
+ ("specs_file", "Specs file to use (e.g. rdimon.specs)", "")
)
env = Environment(platform="posix", variables=vars, ENV = os.environ)
@@ -202,7 +203,9 @@ if 'v7a' in env['arch']:
else:
env.Append(CXXFLAGS = ['-mfloat-abi=hard'])
elif 'v8' in env['arch']:
- if 'sve' in env['arch']:
+ if 'sve2' in env['arch']:
+ env.Append(CXXFLAGS = ['-march=armv8.2-a+sve2+fp16+dotprod'])
+ elif 'sve' in env['arch']:
env.Append(CXXFLAGS = ['-march=armv8.2-a+sve+fp16+dotprod'])
elif 'v8.2-a' in env['arch']:
env.Append(CXXFLAGS = ['-march=armv8.2-a+fp16']) # explicitly enable fp16 extension otherwise __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is undefined
@@ -312,13 +315,17 @@ if env['os'] == 'android':
env.Append(LINKFLAGS = ['-pie', '-static-libstdc++', '-ldl'])
elif env['os'] == 'bare_metal':
env.Append(LINKFLAGS = ['-static'])
- env.Append(LINKFLAGS = ['-specs=rdimon.specs'])
env.Append(CXXFLAGS = ['-fPIC'])
+ if env['specs_file'] == "":
+ env.Append(LINKFLAGS = ['-specs=rdimon.specs'])
env.Append(CPPDEFINES = ['NO_MULTI_THREADING'])
env.Append(CPPDEFINES = ['BARE_METAL'])
if env['os'] == 'linux' and env['arch'] == 'armv7a':
env.Append(CXXFLAGS = [ '-Wno-psabi' ])
+if env['specs_file'] != "":
+ env.Append(LINKFLAGS = ['-specs='+env['specs_file']])
+
if env['opencl']:
if env['os'] in ['bare_metal'] or env['standalone']:
print("Cannot link OpenCL statically, which is required for bare metal / standalone builds")
diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox
index 94f73f8cdb..49ccbce5c3 100644
--- a/docs/00_introduction.dox
+++ b/docs/00_introduction.dox
@@ -1309,85 +1309,169 @@ v16.12 Binary preview release
scons 2.3 or above is required to build the library.
To see the build options available simply run ```scons -h```:
- debug: Debug (yes|no)
- default: False
- actual: False
+ debug: Debug (yes|no)
+ default: False
+ actual: False
+
+ asserts: Enable asserts (this flag is forced to 1 for debug=1) (yes|no)
+ default: False
+ actual: False
+
+ logging: Logging (this flag is forced to 1 for debug=1) (yes|no)
+ default: False
+ actual: False
+
+ arch: Target Architecture (armv7a|arm64-v8a|arm64-v8.2-a|arm64-v8.2-a-sve|arm64-v8.2-a-sve2|x86_32|x86_64|armv8a|armv8.2-a|armv8.2-a-sve|armv8.6-a|armv8.6-a-sve|armv8.6-a-sve2|x86)
+ default: armv7a
+ actual: armv7a
+
+ estate: Execution State (auto|32|64)
+ default: auto
+ actual: auto
+
+ os: Target OS (linux|android|tizen|bare_metal)
+ default: linux
+ actual: linux
+
+ build: Build type (native|cross_compile|embed_only)
+ default: cross_compile
+ actual: cross_compile
+
+ examples: Build example programs (yes|no)
+ default: True
+ actual: True
+
+ gemm_tuner: Build gemm_tuner programs (yes|no)
+ default: True
+ actual: True
+
+ Werror: Enable/disable the -Werror compilation flag (yes|no)
+ default: True
+ actual: True
+
+ standalone: Builds the tests as standalone executables, links statically with libgcc, libstdc++ and libarm_compute (yes|no)
+ default: False
+ actual: False
+
+ opencl: Enable OpenCL support (yes|no)
+ default: True
+ actual: True
+
+ neon: Enable Neon support (yes|no)
+ default: False
+ actual: False
+
+ gles_compute: Enable OpenGL ES Compute Shader support (yes|no)
+ default: False
+ actual: False
+
+ embed_kernels: Embed OpenCL kernels and OpenGL ES compute shaders in library binary (yes|no)
+ default: True
+ actual: True
+
+ set_soname: Set the library's soname and shlibversion (requires SCons 2.4 or above) (yes|no)
+ default: False
+ actual: False
+
+ tracing: Enable runtime tracing (yes|no)
+ default: False
+ actual: False
+
+ openmp: Enable OpenMP backend (yes|no)
+ default: False
+ actual: False
+
+ cppthreads: Enable C++11 threads backend (yes|no)
+ default: True
+ actual: True
+
+ build_dir: Specify sub-folder for the build ( /path/to/build_dir )
+ default: .
+ actual: .
+
+ install_dir: Specify sub-folder for the install ( /path/to/install_dir )
+ default:
+ actual:
+
+ exceptions: Enable/disable C++ exception support (yes|no)
+ default: True
+ actual: True
+
+ linker_script: Use an external linker script ( /path/to/linker_script )
+ default:
+ actual:
+
+ internal_only: Enable ARM internal only tests (yes|no)
+ default: False
+ actual: False
+
+ custom_options: Custom options that can be used to turn on/off features
+ (all|none|comma-separated list of names)
+ allowed names: disable_mmla_fp
+ default: none
+ actual:
- asserts: Enable asserts (this flag is forced to 1 for debug=1) (yes|no)
- default: False
- actual: False
+ data_type_support: Enable a list of data types to support
+ (all|none|comma-separated list of names)
+ allowed names: qasymm8 qasymm8_signed qsymm16 fp16 fp32
+ default: all
+ actual: qasymm8 qasymm8_signed qsymm16 fp16 fp32
- arch: Target Architecture (armv7a|arm64-v8a|arm64-v8.2-a|x86_32|x86_64)
- default: armv7a
- actual: armv7a
+ toolchain_prefix: Override the toolchain prefix
+ default:
+ actual:
- os: Target OS (linux|android|bare_metal)
- default: linux
- actual: linux
+ compiler_prefix: Override the compiler prefix
+ default:
+ actual:
- build: Build type (native|cross_compile|embed_only)
- default: cross_compile
- actual: cross_compile
+ extra_cxx_flags: Extra CXX flags to be appended to the build command
+ default:
+ actual:
- examples: Build example programs (yes|no)
- default: True
- actual: True
+ extra_link_flags: Extra LD flags to be appended to the build command
+ default:
+ actual:
- Werror: Enable/disable the -Werror compilation flag (yes|no)
- default: True
- actual: True
+ compiler_cache: Command to prefix to the C and C++ compiler (e.g ccache)
+ default:
+ actual:
- opencl: Enable OpenCL support (yes|no)
- default: True
- actual: True
+ specs_file: Specs file to use
+ default: rdimon.specs
+ actual: rdimon.specs
- neon: Enable Neon support (yes|no)
- default: False
- actual: False
+ benchmark_examples: Build benchmark examples programs (yes|no)
+ default: True
+ actual: True
- gles_compute: Enable OpenGL ES Compute Shader support (yes|no)
- default: False
- actual: False
+ validate_examples: Build validate examples programs (yes|no)
+ default: True
+ actual: True
- embed_kernels: Embed OpenCL kernels and OpenGL ES compute shader in library binary (yes|no)
- default: True
- actual: True
+ reference_openmp: Build reference validation with openmp (yes|no)
+ default: True
+ actual: True
- set_soname: Set the library's soname and shlibversion (requires SCons 2.4 or above) (yes|no)
- default: False
- actual: False
+ validation_tests: Build validation test programs (yes|no)
+ default: True
+ actual: True
- openmp: Enable OpenMP backend (yes|no)
- default: False
- actual: False
+ benchmark_tests: Build benchmark test programs (yes|no)
+ default: True
+ actual: True
- cppthreads: Enable C++11 threads backend (yes|no)
- default: True
- actual: True
+ test_filter: Pattern to specify the tests' filenames to be compiled
+ default: *.cpp
+ actual: *.cpp
- build_dir: Specify sub-folder for the build ( /path/to/build_dir )
- default: .
- actual: .
+ pmu: Enable PMU counters (yes|no)
+ default: False
+ actual: False
- extra_cxx_flags: Extra CXX flags to be appended to the build command
- default:
- actual:
-
- pmu: Enable PMU counters (yes|no)
- default: False
- actual: False
-
- mali: Enable Mali hardware counters (yes|no)
- default: False
- actual: False
-
- validation_tests: Build validation test programs (yes|no)
- default: False
- actual: False
-
- benchmark_tests: Build benchmark test programs (yes|no)
- default: False
- actual: False
+ mali: Enable Mali hardware counters (yes|no)
+ default: False
+ actual: False
@b debug / @b asserts:
- With debug=1 asserts are enabled, and the library is built with symbols and no optimisations enabled.
@@ -1583,6 +1667,18 @@ For example:
Below is a list of the common parameters among the graph examples :
@snippet utils/CommonGraphOptions.h Common graph examples parameters
+@subsubsection S3_2_3_sve Build for SVE or SVE2
+
+In order to build for SVE or SVE2 you need a compiler that supports them. You can find more information in the following these links:
+ -# GCC: https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/sve-support
+ -# LLVM: https://developer.arm.com/tools-and-software/open-source-software/developer-tools/llvm-toolchain/sve-support
+
+@note You the need to indicate the toolchains using the scons "toolchain_prefix" parameter.
+
+An example build command with SVE is:
+
+ scons arch=arm64-v8.2-a-sve os=linux build_dir=arm64 -j55 standalone=0 opencl=0 openmp=0 validation_tests=1 neon=1 cppthreads=1 toolchain_prefix=aarch64-none-linux-gnu-
+
@subsection S3_3_android Building for Android
For Android, the library was successfully built and tested using Google's standalone toolchains:
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp
index 5770076d04..98004e98a5 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp
@@ -33,7 +33,7 @@
namespace arm_gemm {
-void sve_smallK_hybrid_s8s32_dot_8x1VL(const int8_t *A, int lda, const int8_t *B, int32_t *C, int ldc, int M, int N, int K, const int32_t *bias, Activation act, bool) {
+void sve_smallK_hybrid_s8s32_dot_8x1VL(const int8_t *A, int lda, const int8_t *B, int32_t *C, int ldc, int M, int N, int K, const int32_t *, Activation, bool) {
const long loops_count = iceildiv(N, (int)get_vector_length<int32_t>()) - 1;
const long ldab = lda * sizeof(int8_t);
const long ldcb = ldc * sizeof(int32_t);
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp
index b980d9b5c2..6a8553216b 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp
@@ -33,7 +33,7 @@
namespace arm_gemm {
-void sve_smallK_hybrid_u8u32_dot_8x1VL(const uint8_t *A, int lda, const uint8_t *B, uint32_t *C, int ldc, int M, int N, int K, const uint32_t *bias, Activation act, bool) {
+void sve_smallK_hybrid_u8u32_dot_8x1VL(const uint8_t *A, int lda, const uint8_t *B, uint32_t *C, int ldc, int M, int N, int K, const uint32_t *, Activation , bool) {
const long loops_count = iceildiv(N, (int)get_vector_length<uint32_t>()) - 1;
const long ldab = lda * sizeof(uint8_t);
const long ldcb = ldc * sizeof(uint32_t);