diff options
4 files changed, 174 insertions, 71 deletions
diff --git a/SConstruct b/SConstruct index 395fb5e59d..e9ba496afb 100644 --- a/SConstruct +++ b/SConstruct @@ -43,8 +43,8 @@ vars.AddVariables( BoolVariable("asserts", "Enable asserts (this flag is forced to 1 for debug=1)", False), BoolVariable("logging", "Logging (this flag is forced to 1 for debug=1)", False), EnumVariable("arch", "Target Architecture", "armv7a", - allowed_values=("armv7a", "arm64-v8a", "arm64-v8.2-a", "arm64-v8.2-a-sve", "x86_32", "x86_64", - "armv8a", "armv8.2-a", "armv8.2-a-sve", "armv8.6-a", "armv8.6-a-sve", "x86")), + allowed_values=("armv7a", "arm64-v8a", "arm64-v8.2-a", "arm64-v8.2-a-sve", "arm64-v8.2-a-sve2", "x86_32", "x86_64", + "armv8a", "armv8.2-a", "armv8.2-a-sve", "armv8.6-a", "armv8.6-a-sve", "armv8.6-a-sve2", "x86")), EnumVariable("estate", "Execution State", "auto", allowed_values=("auto", "32", "64")), EnumVariable("os", "Target OS", "linux", allowed_values=("linux", "android", "tizen", "bare_metal")), EnumVariable("build", "Build type", "cross_compile", allowed_values=("native", "cross_compile", "embed_only")), @@ -72,7 +72,8 @@ vars.AddVariables( ("compiler_prefix", "Override the compiler prefix", ""), ("extra_cxx_flags", "Extra CXX flags to be appended to the build command", ""), ("extra_link_flags", "Extra LD flags to be appended to the build command", ""), - ("compiler_cache", "Command to prefix to the C and C++ compiler (e.g ccache)", "") + ("compiler_cache", "Command to prefix to the C and C++ compiler (e.g ccache)", ""), + ("specs_file", "Specs file to use (e.g. rdimon.specs)", "") ) env = Environment(platform="posix", variables=vars, ENV = os.environ) @@ -202,7 +203,9 @@ if 'v7a' in env['arch']: else: env.Append(CXXFLAGS = ['-mfloat-abi=hard']) elif 'v8' in env['arch']: - if 'sve' in env['arch']: + if 'sve2' in env['arch']: + env.Append(CXXFLAGS = ['-march=armv8.2-a+sve2+fp16+dotprod']) + elif 'sve' in env['arch']: env.Append(CXXFLAGS = ['-march=armv8.2-a+sve+fp16+dotprod']) elif 'v8.2-a' in env['arch']: env.Append(CXXFLAGS = ['-march=armv8.2-a+fp16']) # explicitly enable fp16 extension otherwise __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is undefined @@ -312,13 +315,17 @@ if env['os'] == 'android': env.Append(LINKFLAGS = ['-pie', '-static-libstdc++', '-ldl']) elif env['os'] == 'bare_metal': env.Append(LINKFLAGS = ['-static']) - env.Append(LINKFLAGS = ['-specs=rdimon.specs']) env.Append(CXXFLAGS = ['-fPIC']) + if env['specs_file'] == "": + env.Append(LINKFLAGS = ['-specs=rdimon.specs']) env.Append(CPPDEFINES = ['NO_MULTI_THREADING']) env.Append(CPPDEFINES = ['BARE_METAL']) if env['os'] == 'linux' and env['arch'] == 'armv7a': env.Append(CXXFLAGS = [ '-Wno-psabi' ]) +if env['specs_file'] != "": + env.Append(LINKFLAGS = ['-specs='+env['specs_file']]) + if env['opencl']: if env['os'] in ['bare_metal'] or env['standalone']: print("Cannot link OpenCL statically, which is required for bare metal / standalone builds") diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox index 94f73f8cdb..49ccbce5c3 100644 --- a/docs/00_introduction.dox +++ b/docs/00_introduction.dox @@ -1309,85 +1309,169 @@ v16.12 Binary preview release scons 2.3 or above is required to build the library. To see the build options available simply run ```scons -h```: - debug: Debug (yes|no) - default: False - actual: False + debug: Debug (yes|no) + default: False + actual: False + + asserts: Enable asserts (this flag is forced to 1 for debug=1) (yes|no) + default: False + actual: False + + logging: Logging (this flag is forced to 1 for debug=1) (yes|no) + default: False + actual: False + + arch: Target Architecture (armv7a|arm64-v8a|arm64-v8.2-a|arm64-v8.2-a-sve|arm64-v8.2-a-sve2|x86_32|x86_64|armv8a|armv8.2-a|armv8.2-a-sve|armv8.6-a|armv8.6-a-sve|armv8.6-a-sve2|x86) + default: armv7a + actual: armv7a + + estate: Execution State (auto|32|64) + default: auto + actual: auto + + os: Target OS (linux|android|tizen|bare_metal) + default: linux + actual: linux + + build: Build type (native|cross_compile|embed_only) + default: cross_compile + actual: cross_compile + + examples: Build example programs (yes|no) + default: True + actual: True + + gemm_tuner: Build gemm_tuner programs (yes|no) + default: True + actual: True + + Werror: Enable/disable the -Werror compilation flag (yes|no) + default: True + actual: True + + standalone: Builds the tests as standalone executables, links statically with libgcc, libstdc++ and libarm_compute (yes|no) + default: False + actual: False + + opencl: Enable OpenCL support (yes|no) + default: True + actual: True + + neon: Enable Neon support (yes|no) + default: False + actual: False + + gles_compute: Enable OpenGL ES Compute Shader support (yes|no) + default: False + actual: False + + embed_kernels: Embed OpenCL kernels and OpenGL ES compute shaders in library binary (yes|no) + default: True + actual: True + + set_soname: Set the library's soname and shlibversion (requires SCons 2.4 or above) (yes|no) + default: False + actual: False + + tracing: Enable runtime tracing (yes|no) + default: False + actual: False + + openmp: Enable OpenMP backend (yes|no) + default: False + actual: False + + cppthreads: Enable C++11 threads backend (yes|no) + default: True + actual: True + + build_dir: Specify sub-folder for the build ( /path/to/build_dir ) + default: . + actual: . + + install_dir: Specify sub-folder for the install ( /path/to/install_dir ) + default: + actual: + + exceptions: Enable/disable C++ exception support (yes|no) + default: True + actual: True + + linker_script: Use an external linker script ( /path/to/linker_script ) + default: + actual: + + internal_only: Enable ARM internal only tests (yes|no) + default: False + actual: False + + custom_options: Custom options that can be used to turn on/off features + (all|none|comma-separated list of names) + allowed names: disable_mmla_fp + default: none + actual: - asserts: Enable asserts (this flag is forced to 1 for debug=1) (yes|no) - default: False - actual: False + data_type_support: Enable a list of data types to support + (all|none|comma-separated list of names) + allowed names: qasymm8 qasymm8_signed qsymm16 fp16 fp32 + default: all + actual: qasymm8 qasymm8_signed qsymm16 fp16 fp32 - arch: Target Architecture (armv7a|arm64-v8a|arm64-v8.2-a|x86_32|x86_64) - default: armv7a - actual: armv7a + toolchain_prefix: Override the toolchain prefix + default: + actual: - os: Target OS (linux|android|bare_metal) - default: linux - actual: linux + compiler_prefix: Override the compiler prefix + default: + actual: - build: Build type (native|cross_compile|embed_only) - default: cross_compile - actual: cross_compile + extra_cxx_flags: Extra CXX flags to be appended to the build command + default: + actual: - examples: Build example programs (yes|no) - default: True - actual: True + extra_link_flags: Extra LD flags to be appended to the build command + default: + actual: - Werror: Enable/disable the -Werror compilation flag (yes|no) - default: True - actual: True + compiler_cache: Command to prefix to the C and C++ compiler (e.g ccache) + default: + actual: - opencl: Enable OpenCL support (yes|no) - default: True - actual: True + specs_file: Specs file to use + default: rdimon.specs + actual: rdimon.specs - neon: Enable Neon support (yes|no) - default: False - actual: False + benchmark_examples: Build benchmark examples programs (yes|no) + default: True + actual: True - gles_compute: Enable OpenGL ES Compute Shader support (yes|no) - default: False - actual: False + validate_examples: Build validate examples programs (yes|no) + default: True + actual: True - embed_kernels: Embed OpenCL kernels and OpenGL ES compute shader in library binary (yes|no) - default: True - actual: True + reference_openmp: Build reference validation with openmp (yes|no) + default: True + actual: True - set_soname: Set the library's soname and shlibversion (requires SCons 2.4 or above) (yes|no) - default: False - actual: False + validation_tests: Build validation test programs (yes|no) + default: True + actual: True - openmp: Enable OpenMP backend (yes|no) - default: False - actual: False + benchmark_tests: Build benchmark test programs (yes|no) + default: True + actual: True - cppthreads: Enable C++11 threads backend (yes|no) - default: True - actual: True + test_filter: Pattern to specify the tests' filenames to be compiled + default: *.cpp + actual: *.cpp - build_dir: Specify sub-folder for the build ( /path/to/build_dir ) - default: . - actual: . + pmu: Enable PMU counters (yes|no) + default: False + actual: False - extra_cxx_flags: Extra CXX flags to be appended to the build command - default: - actual: - - pmu: Enable PMU counters (yes|no) - default: False - actual: False - - mali: Enable Mali hardware counters (yes|no) - default: False - actual: False - - validation_tests: Build validation test programs (yes|no) - default: False - actual: False - - benchmark_tests: Build benchmark test programs (yes|no) - default: False - actual: False + mali: Enable Mali hardware counters (yes|no) + default: False + actual: False @b debug / @b asserts: - With debug=1 asserts are enabled, and the library is built with symbols and no optimisations enabled. @@ -1583,6 +1667,18 @@ For example: Below is a list of the common parameters among the graph examples : @snippet utils/CommonGraphOptions.h Common graph examples parameters +@subsubsection S3_2_3_sve Build for SVE or SVE2 + +In order to build for SVE or SVE2 you need a compiler that supports them. You can find more information in the following these links: + -# GCC: https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/sve-support + -# LLVM: https://developer.arm.com/tools-and-software/open-source-software/developer-tools/llvm-toolchain/sve-support + +@note You the need to indicate the toolchains using the scons "toolchain_prefix" parameter. + +An example build command with SVE is: + + scons arch=arm64-v8.2-a-sve os=linux build_dir=arm64 -j55 standalone=0 opencl=0 openmp=0 validation_tests=1 neon=1 cppthreads=1 toolchain_prefix=aarch64-none-linux-gnu- + @subsection S3_3_android Building for Android For Android, the library was successfully built and tested using Google's standalone toolchains: diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp index 5770076d04..98004e98a5 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp @@ -33,7 +33,7 @@ namespace arm_gemm { -void sve_smallK_hybrid_s8s32_dot_8x1VL(const int8_t *A, int lda, const int8_t *B, int32_t *C, int ldc, int M, int N, int K, const int32_t *bias, Activation act, bool) { +void sve_smallK_hybrid_s8s32_dot_8x1VL(const int8_t *A, int lda, const int8_t *B, int32_t *C, int ldc, int M, int N, int K, const int32_t *, Activation, bool) { const long loops_count = iceildiv(N, (int)get_vector_length<int32_t>()) - 1; const long ldab = lda * sizeof(int8_t); const long ldcb = ldc * sizeof(int32_t); diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp index b980d9b5c2..6a8553216b 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp @@ -33,7 +33,7 @@ namespace arm_gemm { -void sve_smallK_hybrid_u8u32_dot_8x1VL(const uint8_t *A, int lda, const uint8_t *B, uint32_t *C, int ldc, int M, int N, int K, const uint32_t *bias, Activation act, bool) { +void sve_smallK_hybrid_u8u32_dot_8x1VL(const uint8_t *A, int lda, const uint8_t *B, uint32_t *C, int ldc, int M, int N, int K, const uint32_t *, Activation , bool) { const long loops_count = iceildiv(N, (int)get_vector_length<uint32_t>()) - 1; const long ldab = lda * sizeof(uint8_t); const long ldcb = ldc * sizeof(uint32_t); |