From 8dfb8820d5fe0f72a923eccc3bb73ee0b87d5511 Mon Sep 17 00:00:00 2001 From: Pablo Marquez Tello Date: Thu, 13 Jul 2023 15:45:23 +0100 Subject: Enable S64 output in CLArgMinMax Resolves MLCE-1089 Change-Id: I8b385ef8a00ec5de60299bc7a359766ba5417e68 Signed-off-by: Pablo Marquez Tello Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9918 Tested-by: Arm Jenkins Reviewed-by: Viet-Hoa Do Benchmark: Arm Jenkins Comments-Addressed: Arm Jenkins --- src/core/CL/cl_kernels/common/arg_min_max.cl | 18 +++++++++--------- src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp | 4 +++- 2 files changed, 12 insertions(+), 10 deletions(-) (limited to 'src/core') diff --git a/src/core/CL/cl_kernels/common/arg_min_max.cl b/src/core/CL/cl_kernels/common/arg_min_max.cl index 438f46eb24..413fcf5333 100644 --- a/src/core/CL/cl_kernels/common/arg_min_max.cl +++ b/src/core/CL/cl_kernels/common/arg_min_max.cl @@ -85,9 +85,9 @@ inline DATA_TYPE_OUTPUT vectorized_compute_arg_min_max_4(DATA_TYPE *min_max_val, VEC_DATA_TYPE(COND_DATA_TYPE, 2) idx_sel = VECTOR_PREDICATE_EQ(in.s01, in.s23); in.s01 = select(in.s23, in.s01, idx_sel); - res.s01 = select(res.s23, res.s01, CONVERT(idx_sel, int2)); + res.s01 = select(res.s23, res.s01, CONVERT(idx_sel, VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 2) )); idx_sel.s0 = VECTOR_PREDICATE(in.s0, in.s1) || (in.s0 == in.s1 && CONVERT((res.s0 < res.s1), COND_DATA_TYPE)); - res.s0 = select(res.s1, res.s0, CONVERT(idx_sel.s0, int)); + res.s0 = select(res.s1, res.s0, CONVERT(idx_sel.s0, DATA_TYPE_OUTPUT)); *min_max_val = SCALAR_SELECT_OP(in.s0, in.s1); *min_max_idx = res.s0; } @@ -97,12 +97,12 @@ inline DATA_TYPE_OUTPUT vectorized_compute_arg_min_max_8(DATA_TYPE *min_max_val, VEC_DATA_TYPE(COND_DATA_TYPE, 4) idx_sel = VECTOR_PREDICATE_EQ(in.s0123, in.s4567); in.s0123 = select(in.s4567, in.s0123, idx_sel); - res.s0123 = select(res.s4567, res.s0123, CONVERT(idx_sel, int4)); + res.s0123 = select(res.s4567, res.s0123, CONVERT(idx_sel, VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 4) )); idx_sel.s01 = (VECTOR_PREDICATE(in.s01, in.s23)) || (in.s01 == in.s23 && CONVERT(((res.s01 < res.s23)), VEC_DATA_TYPE(COND_DATA_TYPE, 2))); in.s01 = select(in.s23, in.s01, idx_sel.s01); - res.s01 = select(res.s23, res.s01, CONVERT(idx_sel.s01, int2)); + res.s01 = select(res.s23, res.s01, CONVERT(idx_sel.s01, VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 2) )); idx_sel.s0 = VECTOR_PREDICATE(in.s0, in.s1) || (in.s0 == in.s1 && CONVERT((res.s0 < res.s1), COND_DATA_TYPE)); - res.s0 = select(res.s1, res.s0, CONVERT(idx_sel.s0, int)); + res.s0 = select(res.s1, res.s0, CONVERT(idx_sel.s0, DATA_TYPE_OUTPUT)); *min_max_val = SCALAR_SELECT_OP(in.s0, in.s1); *min_max_idx = res.s0; } @@ -112,15 +112,15 @@ inline DATA_TYPE_OUTPUT vectorized_compute_arg_min_max_16(DATA_TYPE *min_max_val VEC_DATA_TYPE(COND_DATA_TYPE, 8) idx_sel = VECTOR_PREDICATE_EQ(in.s01234567, in.s89abcdef); in.s01234567 = select(in.s89abcdef, in.s01234567, idx_sel); - res.s01234567 = select(res.s89abcdef, res.s01234567, CONVERT(idx_sel, int8)); + res.s01234567 = select(res.s89abcdef, res.s01234567, CONVERT(idx_sel, VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 8) )); idx_sel.s0123 = VECTOR_PREDICATE(in.s0123, in.s4567) || (in.s0123 == in.s4567 && CONVERT(((res.s0123 < res.s4567)), VEC_DATA_TYPE(COND_DATA_TYPE, 4))); in.s0123 = select(in.s4567, in.s0123, idx_sel.s0123); - res.s0123 = select(res.s4567, res.s0123, CONVERT(idx_sel.s0123, int4)); + res.s0123 = select(res.s4567, res.s0123, CONVERT(idx_sel.s0123, VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 4) )); idx_sel.s01 = (VECTOR_PREDICATE(in.s01, in.s23)) || (in.s01 == in.s23 && CONVERT(((res.s01 < res.s23)), VEC_DATA_TYPE(COND_DATA_TYPE, 2))); in.s01 = select(in.s23, in.s01, idx_sel.s01); - res.s01 = select(res.s23, res.s01, CONVERT(idx_sel.s01, int2)); + res.s01 = select(res.s23, res.s01, CONVERT(idx_sel.s01, VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 2) )); idx_sel.s0 = VECTOR_PREDICATE(in.s0, in.s1) || (in.s0 == in.s1 && CONVERT((res.s0 < res.s1), COND_DATA_TYPE)); - res.s0 = select(res.s1, res.s0, CONVERT(idx_sel.s0, int)); + res.s0 = select(res.s1, res.s0, CONVERT(idx_sel.s0, DATA_TYPE_OUTPUT)); *min_max_val = SCALAR_SELECT_OP(in.s0, in.s1); *min_max_idx = res.s0; } diff --git a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp index 41f885e4ba..2728958add 100644 --- a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp @@ -45,13 +45,15 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S32, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32, DataType::S64); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX && op != ReductionOperation::ARG_IDX_MIN, "Only ARG_IDX_MAX and ARG_IDX_MIN are supported"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis"); if(output->total_size() != 0) { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32, DataType::S32, DataType::S64, DataType::U64); } return Status{}; -- cgit v1.2.1