aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-12-09 18:35:57 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-12-11 11:16:42 +0000
commitf2cdce30ca782cac7caebc43c7e67caf677b7358 (patch)
treeab38fbe929b537a783344d1f5f1803574101c222
parent1d8977771274b713c1e237e7dcaea7b61a84a86a (diff)
downloadComputeLibrary-f2cdce30ca782cac7caebc43c7e67caf677b7358.tar.gz
COMPMID-2841: Enable aarch32 builds
Decouples the execution state from the architecture. Now architectures can be set as (armv7a, armv8a, etc) and execution state using the `estate` flag with the following options (auto, 32, 64). Change-Id: Ie7f757b3565495a39c7e20fb350a72fd9c5a2a4f Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/2438 Reviewed-by: Pablo Marquez <pablo.tello@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--SConscript5
-rw-r--r--SConstruct90
-rw-r--r--arm_compute/core/NEON/NEMath.inl4
-rw-r--r--docs/Doxyfile4
-rw-r--r--src/core/NEON/kernels/NESelectKernel.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/a32_merge_float_8x6.hpp4
6 files changed, 66 insertions, 45 deletions
diff --git a/SConscript b/SConscript
index ed22f6eefe..0b7729cced 100644
--- a/SConscript
+++ b/SConscript
@@ -216,11 +216,10 @@ if env['neon']:
graph_files += Glob('src/graph/backends/NEON/*.cpp')
- if env['arch'] == "armv7a":
+ if env['estate'] == '32':
core_files += Glob('src/core/NEON/kernels/arm_gemm/kernels/a32_*/*.cpp')
-
- if "arm64-v8" in env['arch']:
+ if env['estate'] == '64':
core_files += Glob('src/core/NEON/kernels/arm_gemm/kernels/a64_*/*.cpp')
if "sve" in env['arch']:
core_files += Glob('src/core/NEON/kernels/arm_gemm/kernels/sve_*/*.cpp')
diff --git a/SConstruct b/SConstruct
index e63d33e9fd..216920f059 100644
--- a/SConstruct
+++ b/SConstruct
@@ -40,7 +40,9 @@ vars.AddVariables(
BoolVariable("debug", "Debug", False),
BoolVariable("asserts", "Enable asserts (this flag is forced to 1 for debug=1)", False),
BoolVariable("logging", "Logging (this flag is forced to 1 for debug=1)", False),
- EnumVariable("arch", "Target Architecture", "armv7a", allowed_values=("armv7a", "arm64-v8a", "arm64-v8.2-a", "arm64-v8.2-a-sve", "x86_32", "x86_64")),
+ EnumVariable("arch", "Target Architecture", "armv7a",
+ allowed_values=("armv7a", "arm64-v8a", "arm64-v8.2-a", "arm64-v8.2-a-sve", "x86_32", "x86_64", "armv8a", "armv8.2-a", "armv8.2-a-sve", "x86")),
+ EnumVariable("estate", "Execution State", "auto", allowed_values=("auto", "32", "64")),
EnumVariable("os", "Target OS", "linux", allowed_values=("linux", "android", "bare_metal")),
EnumVariable("build", "Build type", "cross_compile", allowed_values=("native", "cross_compile", "embed_only")),
BoolVariable("examples", "Build example programs", True),
@@ -164,48 +166,66 @@ if env['openmp']:
env.Append(CXXFLAGS = ['-fopenmp'])
env.Append(LINKFLAGS = ['-fopenmp'])
+# Validate and define state
+if env['estate'] == 'auto':
+ if 'v7a' in env['arch']:
+ env['estate'] = '32'
+ else:
+ env['estate'] = '64'
+
+# Map legacy arch
+if 'arm64' in env['arch']:
+ env['estate'] = '64'
+
+if 'v7a' in env['estate'] and env['estate'] == '64':
+ print("ERROR: armv7a architecture has only 32-bit execution state")
+ Exit(1)
+
# Add architecture specific flags
prefix = ""
-if env['arch'] == 'armv7a':
+if 'v7a' in env['arch']:
env.Append(CXXFLAGS = ['-march=armv7-a', '-mthumb', '-mfpu=neon'])
-
- if env['os'] == 'linux':
- prefix = "arm-linux-gnueabihf-"
- env.Append(CXXFLAGS = ['-mfloat-abi=hard'])
- elif env['os'] == 'bare_metal':
- prefix = "arm-eabi-"
- env.Append(CXXFLAGS = ['-mfloat-abi=hard'])
- elif env['os'] == 'android':
- prefix = "arm-linux-androideabi-"
+ if env['os'] == 'android':
env.Append(CXXFLAGS = ['-mfloat-abi=softfp'])
-elif env['arch'] == 'arm64-v8a':
+ else:
+ env.Append(CXXFLAGS = ['-mfloat-abi=hard'])
+elif 'v8a' in env['arch']:
env.Append(CXXFLAGS = ['-march=armv8-a'])
- env.Append(CPPDEFINES = ['ARM_COMPUTE_AARCH64_V8A'])
- if env['os'] == 'linux':
- prefix = "aarch64-linux-gnu-"
- elif env['os'] == 'bare_metal':
- prefix = "aarch64-elf-"
- elif env['os'] == 'android':
- prefix = "aarch64-linux-android-"
-elif 'arm64-v8.2-a' in env['arch']:
- if env['arch'] == 'arm64-v8.2-a-sve':
+ if env['estate'] == '32':
+ env.Append(CXXFLAGS = ['-mfpu=neon-fp-armv8'])
+elif 'v8.2-a' in env['arch']:
+ if env['estate'] == '32':
+ env.Append(CXXFLAGS = ['-mfpu=neon-fp-armv8'])
+ if 'sve' in env['arch']:
env.Append(CXXFLAGS = ['-march=armv8.2-a+sve+fp16+dotprod'])
else:
env.Append(CXXFLAGS = ['-march=armv8.2-a+fp16']) # explicitly enable fp16 extension otherwise __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is undefined
- if env['os'] == 'linux':
- prefix = "aarch64-linux-gnu-"
- elif env['os'] == 'bare_metal':
- prefix = "aarch64-elf-"
- elif env['os'] == 'android':
- prefix = "aarch64-linux-android-"
- env.Append(CPPDEFINES = ['ARM_COMPUTE_AARCH64_V8_2'])
-elif env['arch'] == 'x86_32':
- env.Append(CCFLAGS = ['-m32'])
- env.Append(LINKFLAGS = ['-m32'])
-elif env['arch'] == 'x86_64':
- env.Append(CXXFLAGS = ['-fPIC'])
- env.Append(CCFLAGS = ['-m64'])
- env.Append(LINKFLAGS = ['-m64'])
+elif 'x86' in env['arch']:
+ if env['estate'] == '32':
+ env.Append(CCFLAGS = ['-m32'])
+ env.Append(LINKFLAGS = ['-m32'])
+ else:
+ env.Append(CXXFLAGS = ['-fPIC'])
+ env.Append(CCFLAGS = ['-m64'])
+ env.Append(LINKFLAGS = ['-m64'])
+
+# Define toolchain
+prefix = ""
+if 'x86' not in env['arch']:
+ if env['estate'] == '32':
+ if env['os'] == 'linux':
+ prefix = "arm-linux-gnueabihf-" if 'v7' in env['arch'] else "armv8l-linux-gnueabihf-"
+ elif env['os'] == 'bare_metal':
+ prefix = "arm-eabi-"
+ elif env['os'] == 'android':
+ prefix = "arm-linux-androideabi-"
+ elif env['estate'] == '64' and 'v8' in env['arch']:
+ if env['os'] == 'linux':
+ prefix = "aarch64-linux-gnu-"
+ elif env['os'] == 'bare_metal':
+ prefix = "aarch64-elf-"
+ elif env['os'] == 'android':
+ prefix = "aarch64-linux-android-"
if env['build'] == 'native':
prefix = ""
diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl
index a3601f6a25..179f1b6299 100644
--- a/arm_compute/core/NEON/NEMath.inl
+++ b/arm_compute/core/NEON/NEMath.inl
@@ -434,7 +434,7 @@ inline float16x8_t vexpq_f16(float16x8_t x)
const float32x4_t x_high = vcvt_f32_f16(vget_high_f16(x));
const float32x4_t x_low = vcvt_f32_f16(vget_low_f16(x));
- const float16x8_t res = vcvt_high_f16_f32(vcvt_f16_f32(vexpq_f32(x_low)), vexpq_f32(x_high));
+ const float16x8_t res = vcombine_f16(vcvt_f16_f32(vexpq_f32(x_low)), vcvt_f16_f32(vexpq_f32(x_high)));
return res;
}
@@ -444,7 +444,7 @@ inline float16x8_t vlogq_f16(float16x8_t x)
const float32x4_t x_high = vcvt_f32_f16(vget_high_f16(x));
const float32x4_t x_low = vcvt_f32_f16(vget_low_f16(x));
- const float16x8_t res = vcvt_high_f16_f32(vcvt_f16_f32(vlogq_f32(x_low)), vlogq_f32(x_high));
+ const float16x8_t res = vcombine_f16(vcvt_f16_f32(vlogq_f32(x_low)), vcvt_f16_f32(vlogq_f32(x_high)));
return res;
}
diff --git a/docs/Doxyfile b/docs/Doxyfile
index 835aa00a05..7f5aa5bdbe 100644
--- a/docs/Doxyfile
+++ b/docs/Doxyfile
@@ -2087,9 +2087,7 @@ PREDEFINED = DOXYGEN_SKIP_THIS \
LOCATE_MIN \
LOCATE_MAX \
HAS_BIAS \
- POOL_AVG \
- ARM_COMPUTE_AARCH64_V8_2 \
- ARM_COMPUTE_AARCH64_V8A
+ POOL_AVG
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
diff --git a/src/core/NEON/kernels/NESelectKernel.cpp b/src/core/NEON/kernels/NESelectKernel.cpp
index 72afe4f054..191d182002 100644
--- a/src/core/NEON/kernels/NESelectKernel.cpp
+++ b/src/core/NEON/kernels/NESelectKernel.cpp
@@ -197,7 +197,7 @@ void NESelectKernel::configure(const ITensor *c, const ITensor *x, const ITensor
};
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
map_function["op_F16"] = &select_op_16<float16_t, uint16x8_t>;
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
}
else
{
@@ -213,7 +213,7 @@ void NESelectKernel::configure(const ITensor *c, const ITensor *x, const ITensor
};
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
map_function["op_F16"] = &select_op_not_same_rank<float16_t>;
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
}
auto it = map_function.find(function_to_call);
diff --git a/src/core/NEON/kernels/arm_gemm/merges/a32_merge_float_8x6.hpp b/src/core/NEON/kernels/arm_gemm/merges/a32_merge_float_8x6.hpp
index 9409646818..16bdbb5986 100644
--- a/src/core/NEON/kernels/arm_gemm/merges/a32_merge_float_8x6.hpp
+++ b/src/core/NEON/kernels/arm_gemm/merges/a32_merge_float_8x6.hpp
@@ -83,12 +83,16 @@ void MergeResults<8, 6, false>(float *out, const float *in, const int ldout, con
switch ((y + 5) - ymax) {
case 4:
outptr1 = dummyres;
+ /* fall through */
case 3:
outptr2 = dummyres;
+ /* fall through */
case 2:
outptr3 = dummyres;
+ /* fall through */
case 1:
outptr4 = dummyres;
+ /* fall through */
case 0:
outptr5 = dummyres;
break;