aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2020-08-28 13:25:31 +0100
committerMichele Di Giorgio <michele.digiorgio@arm.com>2020-08-28 14:51:30 +0000
commit1c76c1ddcd1294ee8149bd74ecf6f62963408286 (patch)
treed6633cf8b331d0edeed9247f8d805d8bcf2019b9 /src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
parent5bdde8509542e942e908e9d508dd39c73194abfb (diff)
downloadComputeLibrary-1c76c1ddcd1294ee8149bd74ecf6f62963408286.tar.gz
COMPMID-3504: Add support for BOOL in NEON comparison operators
Change-Id: I81b0c2482bc20b1ab5124ed6179bb94cbced7875 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3869 Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEElementwiseOperationKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEElementwiseOperationKernel.cpp41
1 files changed, 40 insertions, 1 deletions
diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
index 014a564bf1..db4f5923bc 100644
--- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
@@ -446,6 +446,21 @@ inline int elementwise_arithm_op_quantized_signed_broadcast_loop(int window_star
}
template <ComparisonOperation op, typename InputScalarType, typename InputVectorType>
+inline int elementwise_comp_op_8_loop(int window_start_x, int window_end_x, int window_step_x,
+ const InputScalarType *input1_ptr, const InputScalarType *input2_ptr, uint8_t *output_ptr)
+{
+ int x = window_start_x;
+ for(; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const auto a = wrapper::vloadq(input1_ptr + x);
+ const auto b = wrapper::vloadq(input2_ptr + x);
+ const auto res = elementwise_comp_op<op, InputVectorType, uint8x16_t>(a, b);
+ wrapper::vstore(output_ptr + x, res);
+ }
+ return x;
+}
+
+template <ComparisonOperation op, typename InputScalarType, typename InputVectorType>
inline int elementwise_comp_op_16_loop(int window_start_x, int window_end_x, int window_step_x,
const InputScalarType *input1_ptr, const InputScalarType *input2_ptr, uint8_t *output_ptr)
{
@@ -526,6 +541,19 @@ inline int elementwise_comp_op_quantized_signed_loop(int window_start_x, int win
}
template <ComparisonOperation op, typename InputScalarType, typename InputVectorType>
+inline int elementwise_comp_op_broadcast_8_loop(int window_start_x, int window_end_x, int window_step_x,
+ const InputScalarType *non_broadcast_input_ptr, const InputScalarType &broadcast_value, uint8_t *output_ptr, const bool reorder)
+{
+ int x = window_start_x;
+ for(; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const auto a = elementwise_comp_op_broadcast<op, InputScalarType, InputVectorType, uint8x16_t>(wrapper::vloadq((non_broadcast_input_ptr + x)), broadcast_value, reorder);
+ wrapper::vstore(output_ptr + x, a);
+ }
+ return x;
+}
+
+template <ComparisonOperation op, typename InputScalarType, typename InputVectorType>
inline int elementwise_comp_op_broadcast_16_loop(int window_start_x, int window_end_x, int window_step_x,
const InputScalarType *non_broadcast_input_ptr, const InputScalarType &broadcast_value, uint8_t *output_ptr, const bool reorder)
{
@@ -995,6 +1023,15 @@ void elementwise_op_quantized_signed(const ITensor *in1, const ITensor *in2, ITe
}
template <ComparisonOperation op, typename InputScalarType, typename InputVectorType>
+void elementwise_comp_op_8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
+{
+ elementwise_op<InputScalarType, uint8_t, InputVectorType>(in1, in2, out, window,
+ &elementwise_comp_op_scalar<op, InputScalarType>,
+ &elementwise_comp_op_broadcast_8_loop<op, InputScalarType, InputVectorType>,
+ &elementwise_comp_op_8_loop<op, InputScalarType, InputVectorType>);
+}
+
+template <ComparisonOperation op, typename InputScalarType, typename InputVectorType>
void elementwise_comp_op_16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
{
elementwise_op<InputScalarType, uint8_t, InputVectorType>(in1, in2, out, window,
@@ -1101,6 +1138,7 @@ configure_comp_func(const ITensorInfo *input1, const ITensorInfo *input2, ITenso
{
static std::map<std::string, NEElementwiseOperationKernel::ElementwiseFunction *> map_function =
{
+ { "op_U8_U8_U8", &elementwise_comp_op_8<op, uint8_t, uint8x16_t> },
{ "op_F32_F32_U8", &elementwise_comp_op_32<op, float, float32x4_t> },
{ "op_S16_S16_U8", &elementwise_comp_op_16<op, int16_t, int16x8_t> },
{ "op_S32_S32_U8", &elementwise_comp_op_32<op, int32_t, int32x4_t> },
@@ -1122,7 +1160,6 @@ NEElementwiseOperationKernel::NEElementwiseOperationKernel()
Status NEElementwiseOperationKernel::validate_arguments_common(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::S32, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input1);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &input2);
@@ -1194,6 +1231,7 @@ void NEArithmeticOperationKernel::configure(ArithmeticOperation op, const ITenso
Status NEArithmeticOperationKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
{
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::S32, DataType::F32);
// Validate in case of configured output
if(output.total_size() > 0)
{
@@ -1285,6 +1323,7 @@ void NEComparisonOperationKernel::configure(ComparisonOperation op, const ITenso
Status NEComparisonOperationKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
{
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::S32, DataType::F32);
// Validate in case of configured output
if(output.total_size() > 0)
{