Enable fat binary support

Changes our build system to allow building both Neon(TM) and SVE kernels and package them in the same binary. This will allow runtime selection of the underlying architecture. Adds new build option, fat_binary, for enabling this feature. Change-Id: I8e8386149773ce28e071a2fb7ddd8c8ae0f28a4a Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5704 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2021-04-22 16:42:03 +0100
committer: Michalis Spyrou <michalis.spyrou@arm.com> 2021-06-07 13:21:17 +0000
commit: bdcdc39d89b6a6556f5c0483af5379f75eae0c55 (patch)
tree: 454cd50afa81da3ca3382701619fef023911e3f7 /src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h
parent: 5a643320b79f15a5d09b5366c4744579cf71e303 (diff)
download: ComputeLibrary-bdcdc39d89b6a6556f5c0483af5379f75eae0c55.tar.gz
1 files changed, 8 insertions, 11 deletions
diff --git a/src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h b/src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h
index b6342c727c..6c5524e284 100644
--- a/src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h
+++ b/src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h
@@ -26,14 +26,13 @@
 
 #if defined(__ARM_FEATURE_SVE2)
 
+#include "src/core/NEON/wrapper/svtraits.h"
 #include "src/core/cpu/kernels/elementwise/sve/elementwise_list.h"
 
 namespace arm_compute
 {
 namespace cpu
 {
-namespace sve
-{
 using namespace arm_compute::wrapper;
 
 template <typename InputScalarType, typename OutputScalarType, typename OperatorType>
@@ -176,7 +175,7 @@ inline void comparison_op_quantized_loop(svbool_t pg, const QuantizedLoopArgumen
     const auto in1 = load_quantized(args.input1_ptr, pg, args.in1_offset, args.in1_scale);
     const auto in2 = load_quantized(args.input2_ptr, pg, args.in2_offset, args.in2_scale);
 
-    using OutputVectorType = typename sve_vector<OutputScalarType>::type;
+    using OutputVectorType = typename wrapper::traits::sve_vector<OutputScalarType>::type;
 
     const auto result = svcreate4(
                             elementwise_comparison_op<svfloat32_t, OutputVectorType>(pg, svget4(in1, 0), svget4(in2, 0), args.op),
@@ -200,7 +199,7 @@ inline void comparison_op_broadcast_quantized_loop(svbool_t pg, const BroadcastQ
     const auto &af = args.reorder ? in2 : in1;
     const auto &bf = args.reorder ? in1 : in2;
 
-    using OutputVectorType = typename sve_vector<OutputScalarType>::type;
+    using OutputVectorType = typename wrapper::traits::sve_vector<OutputScalarType>::type;
 
     const auto result = svcreate4(
                             elementwise_comparison_op<svfloat32_t, OutputVectorType>(pg, svget4(af, 0), svget4(bf, 0), args.op),
@@ -221,8 +220,8 @@ template <typename InputScalarType, typename OutputScalarType, typename Operator
 using BroadcastQuantizedLoopFuncType = void (*)(svbool_t, const BroadcastQuantizedLoopArguments<InputScalarType, OutputScalarType, OperatorType> &);
 
 template <typename InputVectorType, typename OutputVectorType, typename OperatorType,
-          typename InputScalarType  = typename sve_scalar<InputVectorType>::type,
-          typename OutputScalarType = typename sve_scalar<OutputVectorType>::type>
+          typename InputScalarType  = typename wrapper::sve_scalar<InputVectorType>::type,
+          typename OutputScalarType = typename wrapper::sve_scalar<OutputVectorType>::type>
 void elementwise_quantized_op(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window,
                               OperatorType op,
                               LoopQuantizedFuncType<InputScalarType, OutputScalarType, OperatorType>          func,
@@ -344,7 +343,7 @@ void elementwise_quantized_op(const ITensor *in1, const ITensor *in2, ITensor *o
 template <ArithmeticOperation op, typename ScalarType>
 void elementwise_arithmetic_quantized_op(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
 {
-    using VectorType = typename sve_vector<ScalarType>::type;
+    using VectorType = typename wrapper::traits::sve_vector<ScalarType>::type;
     elementwise_quantized_op<VectorType, VectorType, ArithmeticOperation>(in1, in2, out, window, op,
                                                                           &arithmetic_op_quantized_loop<ScalarType, ScalarType>,
                                                                           &arithmetic_op_broadcast_quantized_loop<ScalarType, ScalarType>);
@@ -354,14 +353,12 @@ template <ComparisonOperation op, typename InputScalarType, typename OutputScala
 void elementwise_comparison_quantized_op(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
 {
     static_assert(sizeof(InputScalarType) >= sizeof(OutputScalarType), "input data type's width should be equal to or greater than output data type's width");
-    using InputVectorType  = typename sve_vector<InputScalarType>::type;
-    using OutputVectorType = typename sve_vector<OutputScalarType>::type;
+    using InputVectorType  = typename wrapper::traits::sve_vector<InputScalarType>::type;
+    using OutputVectorType = typename wrapper::traits::sve_vector<OutputScalarType>::type;
     elementwise_quantized_op<InputVectorType, OutputVectorType, ComparisonOperation>(in1, in2, out, window, op,
                                                                                      &comparison_op_quantized_loop<InputScalarType, OutputScalarType>,
                                                                                      &comparison_op_broadcast_quantized_loop<InputScalarType, OutputScalarType>);
 }
-
-} // namespace sve
 } // namespace cpu
 } // namespace arm_compute
author	Georgios Pinitas <georgios.pinitas@arm.com>	2021-04-22 16:42:03 +0100
committer	Michalis Spyrou <michalis.spyrou@arm.com>	2021-06-07 13:21:17 +0000
commit	bdcdc39d89b6a6556f5c0483af5379f75eae0c55 (patch)
tree	454cd50afa81da3ca3382701619fef023911e3f7 /src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h
parent	5a643320b79f15a5d09b5366c4744579cf71e303 (diff)
download	ComputeLibrary-bdcdc39d89b6a6556f5c0483af5379f75eae0c55.tar.gz