diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2021-04-22 16:42:03 +0100 |
---|---|---|
committer | Michalis Spyrou <michalis.spyrou@arm.com> | 2021-06-07 13:21:17 +0000 |
commit | bdcdc39d89b6a6556f5c0483af5379f75eae0c55 (patch) | |
tree | 454cd50afa81da3ca3382701619fef023911e3f7 /src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h | |
parent | 5a643320b79f15a5d09b5366c4744579cf71e303 (diff) | |
download | ComputeLibrary-bdcdc39d89b6a6556f5c0483af5379f75eae0c55.tar.gz |
Enable fat binary support
Changes our build system to allow building both Neon(TM) and SVE
kernels and package them in the same binary. This will allow
runtime selection of the underlying architecture.
Adds new build option, fat_binary, for enabling this feature.
Change-Id: I8e8386149773ce28e071a2fb7ddd8c8ae0f28a4a
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5704
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h')
-rw-r--r-- | src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h | 19 |
1 files changed, 8 insertions, 11 deletions
diff --git a/src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h b/src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h index b6342c727c..6c5524e284 100644 --- a/src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h +++ b/src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h @@ -26,14 +26,13 @@ #if defined(__ARM_FEATURE_SVE2) +#include "src/core/NEON/wrapper/svtraits.h" #include "src/core/cpu/kernels/elementwise/sve/elementwise_list.h" namespace arm_compute { namespace cpu { -namespace sve -{ using namespace arm_compute::wrapper; template <typename InputScalarType, typename OutputScalarType, typename OperatorType> @@ -176,7 +175,7 @@ inline void comparison_op_quantized_loop(svbool_t pg, const QuantizedLoopArgumen const auto in1 = load_quantized(args.input1_ptr, pg, args.in1_offset, args.in1_scale); const auto in2 = load_quantized(args.input2_ptr, pg, args.in2_offset, args.in2_scale); - using OutputVectorType = typename sve_vector<OutputScalarType>::type; + using OutputVectorType = typename wrapper::traits::sve_vector<OutputScalarType>::type; const auto result = svcreate4( elementwise_comparison_op<svfloat32_t, OutputVectorType>(pg, svget4(in1, 0), svget4(in2, 0), args.op), @@ -200,7 +199,7 @@ inline void comparison_op_broadcast_quantized_loop(svbool_t pg, const BroadcastQ const auto &af = args.reorder ? in2 : in1; const auto &bf = args.reorder ? in1 : in2; - using OutputVectorType = typename sve_vector<OutputScalarType>::type; + using OutputVectorType = typename wrapper::traits::sve_vector<OutputScalarType>::type; const auto result = svcreate4( elementwise_comparison_op<svfloat32_t, OutputVectorType>(pg, svget4(af, 0), svget4(bf, 0), args.op), @@ -221,8 +220,8 @@ template <typename InputScalarType, typename OutputScalarType, typename Operator using BroadcastQuantizedLoopFuncType = void (*)(svbool_t, const BroadcastQuantizedLoopArguments<InputScalarType, OutputScalarType, OperatorType> &); template <typename InputVectorType, typename OutputVectorType, typename OperatorType, - typename InputScalarType = typename sve_scalar<InputVectorType>::type, - typename OutputScalarType = typename sve_scalar<OutputVectorType>::type> + typename InputScalarType = typename wrapper::sve_scalar<InputVectorType>::type, + typename OutputScalarType = typename wrapper::sve_scalar<OutputVectorType>::type> void elementwise_quantized_op(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, OperatorType op, LoopQuantizedFuncType<InputScalarType, OutputScalarType, OperatorType> func, @@ -344,7 +343,7 @@ void elementwise_quantized_op(const ITensor *in1, const ITensor *in2, ITensor *o template <ArithmeticOperation op, typename ScalarType> void elementwise_arithmetic_quantized_op(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) { - using VectorType = typename sve_vector<ScalarType>::type; + using VectorType = typename wrapper::traits::sve_vector<ScalarType>::type; elementwise_quantized_op<VectorType, VectorType, ArithmeticOperation>(in1, in2, out, window, op, &arithmetic_op_quantized_loop<ScalarType, ScalarType>, &arithmetic_op_broadcast_quantized_loop<ScalarType, ScalarType>); @@ -354,14 +353,12 @@ template <ComparisonOperation op, typename InputScalarType, typename OutputScala void elementwise_comparison_quantized_op(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) { static_assert(sizeof(InputScalarType) >= sizeof(OutputScalarType), "input data type's width should be equal to or greater than output data type's width"); - using InputVectorType = typename sve_vector<InputScalarType>::type; - using OutputVectorType = typename sve_vector<OutputScalarType>::type; + using InputVectorType = typename wrapper::traits::sve_vector<InputScalarType>::type; + using OutputVectorType = typename wrapper::traits::sve_vector<OutputScalarType>::type; elementwise_quantized_op<InputVectorType, OutputVectorType, ComparisonOperation>(in1, in2, out, window, op, &comparison_op_quantized_loop<InputScalarType, OutputScalarType>, &comparison_op_broadcast_quantized_loop<InputScalarType, OutputScalarType>); } - -} // namespace sve } // namespace cpu } // namespace arm_compute |