From 1c76c1ddcd1294ee8149bd74ecf6f62963408286 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Fri, 28 Aug 2020 13:25:31 +0100 Subject: COMPMID-3504: Add support for BOOL in NEON comparison operators Change-Id: I81b0c2482bc20b1ab5124ed6179bb94cbced7875 Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3869 Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- .../NEON/kernels/NEElementwiseOperationKernel.cpp | 41 +++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) (limited to 'src/core/NEON/kernels/NEElementwiseOperationKernel.cpp') diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp index 014a564bf1..db4f5923bc 100644 --- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp @@ -445,6 +445,21 @@ inline int elementwise_arithm_op_quantized_signed_broadcast_loop(int window_star return x; } +template +inline int elementwise_comp_op_8_loop(int window_start_x, int window_end_x, int window_step_x, + const InputScalarType *input1_ptr, const InputScalarType *input2_ptr, uint8_t *output_ptr) +{ + int x = window_start_x; + for(; x <= (window_end_x - window_step_x); x += window_step_x) + { + const auto a = wrapper::vloadq(input1_ptr + x); + const auto b = wrapper::vloadq(input2_ptr + x); + const auto res = elementwise_comp_op(a, b); + wrapper::vstore(output_ptr + x, res); + } + return x; +} + template inline int elementwise_comp_op_16_loop(int window_start_x, int window_end_x, int window_step_x, const InputScalarType *input1_ptr, const InputScalarType *input2_ptr, uint8_t *output_ptr) @@ -525,6 +540,19 @@ inline int elementwise_comp_op_quantized_signed_loop(int window_start_x, int win return x; } +template +inline int elementwise_comp_op_broadcast_8_loop(int window_start_x, int window_end_x, int window_step_x, + const InputScalarType *non_broadcast_input_ptr, const InputScalarType &broadcast_value, uint8_t *output_ptr, const bool reorder) +{ + int x = window_start_x; + for(; x <= (window_end_x - window_step_x); x += window_step_x) + { + const auto a = elementwise_comp_op_broadcast(wrapper::vloadq((non_broadcast_input_ptr + x)), broadcast_value, reorder); + wrapper::vstore(output_ptr + x, a); + } + return x; +} + template inline int elementwise_comp_op_broadcast_16_loop(int window_start_x, int window_end_x, int window_step_x, const InputScalarType *non_broadcast_input_ptr, const InputScalarType &broadcast_value, uint8_t *output_ptr, const bool reorder) @@ -994,6 +1022,15 @@ void elementwise_op_quantized_signed(const ITensor *in1, const ITensor *in2, ITe } } +template +void elementwise_comp_op_8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) +{ + elementwise_op(in1, in2, out, window, + &elementwise_comp_op_scalar, + &elementwise_comp_op_broadcast_8_loop, + &elementwise_comp_op_8_loop); +} + template void elementwise_comp_op_16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) { @@ -1101,6 +1138,7 @@ configure_comp_func(const ITensorInfo *input1, const ITensorInfo *input2, ITenso { static std::map map_function = { + { "op_U8_U8_U8", &elementwise_comp_op_8 }, { "op_F32_F32_U8", &elementwise_comp_op_32 }, { "op_S16_S16_U8", &elementwise_comp_op_16 }, { "op_S32_S32_U8", &elementwise_comp_op_32 }, @@ -1122,7 +1160,6 @@ NEElementwiseOperationKernel::NEElementwiseOperationKernel() Status NEElementwiseOperationKernel::validate_arguments_common(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::S32, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input1); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &input2); @@ -1194,6 +1231,7 @@ void NEArithmeticOperationKernel::configure(ArithmeticOperation op, const ITenso Status NEArithmeticOperationKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::S32, DataType::F32); // Validate in case of configured output if(output.total_size() > 0) { @@ -1285,6 +1323,7 @@ void NEComparisonOperationKernel::configure(ComparisonOperation op, const ITenso Status NEComparisonOperationKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::S32, DataType::F32); // Validate in case of configured output if(output.total_size() > 0) { -- cgit v1.2.1