From 8dfb8820d5fe0f72a923eccc3bb73ee0b87d5511 Mon Sep 17 00:00:00 2001 From: Pablo Marquez Tello Date: Thu, 13 Jul 2023 15:45:23 +0100 Subject: Enable S64 output in CLArgMinMax Resolves MLCE-1089 Change-Id: I8b385ef8a00ec5de60299bc7a359766ba5417e68 Signed-off-by: Pablo Marquez Tello Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9918 Tested-by: Arm Jenkins Reviewed-by: Viet-Hoa Do Benchmark: Arm Jenkins Comments-Addressed: Arm Jenkins --- src/core/CL/cl_kernels/common/arg_min_max.cl | 18 +++++++++--------- src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp | 4 +++- src/runtime/CL/functions/CLArgMinMaxLayer.cpp | 5 +++++ 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/core/CL/cl_kernels/common/arg_min_max.cl b/src/core/CL/cl_kernels/common/arg_min_max.cl index 438f46eb24..413fcf5333 100644 --- a/src/core/CL/cl_kernels/common/arg_min_max.cl +++ b/src/core/CL/cl_kernels/common/arg_min_max.cl @@ -85,9 +85,9 @@ inline DATA_TYPE_OUTPUT vectorized_compute_arg_min_max_4(DATA_TYPE *min_max_val, VEC_DATA_TYPE(COND_DATA_TYPE, 2) idx_sel = VECTOR_PREDICATE_EQ(in.s01, in.s23); in.s01 = select(in.s23, in.s01, idx_sel); - res.s01 = select(res.s23, res.s01, CONVERT(idx_sel, int2)); + res.s01 = select(res.s23, res.s01, CONVERT(idx_sel, VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 2) )); idx_sel.s0 = VECTOR_PREDICATE(in.s0, in.s1) || (in.s0 == in.s1 && CONVERT((res.s0 < res.s1), COND_DATA_TYPE)); - res.s0 = select(res.s1, res.s0, CONVERT(idx_sel.s0, int)); + res.s0 = select(res.s1, res.s0, CONVERT(idx_sel.s0, DATA_TYPE_OUTPUT)); *min_max_val = SCALAR_SELECT_OP(in.s0, in.s1); *min_max_idx = res.s0; } @@ -97,12 +97,12 @@ inline DATA_TYPE_OUTPUT vectorized_compute_arg_min_max_8(DATA_TYPE *min_max_val, VEC_DATA_TYPE(COND_DATA_TYPE, 4) idx_sel = VECTOR_PREDICATE_EQ(in.s0123, in.s4567); in.s0123 = select(in.s4567, in.s0123, idx_sel); - res.s0123 = select(res.s4567, res.s0123, CONVERT(idx_sel, int4)); + res.s0123 = select(res.s4567, res.s0123, CONVERT(idx_sel, VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 4) )); idx_sel.s01 = (VECTOR_PREDICATE(in.s01, in.s23)) || (in.s01 == in.s23 && CONVERT(((res.s01 < res.s23)), VEC_DATA_TYPE(COND_DATA_TYPE, 2))); in.s01 = select(in.s23, in.s01, idx_sel.s01); - res.s01 = select(res.s23, res.s01, CONVERT(idx_sel.s01, int2)); + res.s01 = select(res.s23, res.s01, CONVERT(idx_sel.s01, VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 2) )); idx_sel.s0 = VECTOR_PREDICATE(in.s0, in.s1) || (in.s0 == in.s1 && CONVERT((res.s0 < res.s1), COND_DATA_TYPE)); - res.s0 = select(res.s1, res.s0, CONVERT(idx_sel.s0, int)); + res.s0 = select(res.s1, res.s0, CONVERT(idx_sel.s0, DATA_TYPE_OUTPUT)); *min_max_val = SCALAR_SELECT_OP(in.s0, in.s1); *min_max_idx = res.s0; } @@ -112,15 +112,15 @@ inline DATA_TYPE_OUTPUT vectorized_compute_arg_min_max_16(DATA_TYPE *min_max_val VEC_DATA_TYPE(COND_DATA_TYPE, 8) idx_sel = VECTOR_PREDICATE_EQ(in.s01234567, in.s89abcdef); in.s01234567 = select(in.s89abcdef, in.s01234567, idx_sel); - res.s01234567 = select(res.s89abcdef, res.s01234567, CONVERT(idx_sel, int8)); + res.s01234567 = select(res.s89abcdef, res.s01234567, CONVERT(idx_sel, VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 8) )); idx_sel.s0123 = VECTOR_PREDICATE(in.s0123, in.s4567) || (in.s0123 == in.s4567 && CONVERT(((res.s0123 < res.s4567)), VEC_DATA_TYPE(COND_DATA_TYPE, 4))); in.s0123 = select(in.s4567, in.s0123, idx_sel.s0123); - res.s0123 = select(res.s4567, res.s0123, CONVERT(idx_sel.s0123, int4)); + res.s0123 = select(res.s4567, res.s0123, CONVERT(idx_sel.s0123, VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 4) )); idx_sel.s01 = (VECTOR_PREDICATE(in.s01, in.s23)) || (in.s01 == in.s23 && CONVERT(((res.s01 < res.s23)), VEC_DATA_TYPE(COND_DATA_TYPE, 2))); in.s01 = select(in.s23, in.s01, idx_sel.s01); - res.s01 = select(res.s23, res.s01, CONVERT(idx_sel.s01, int2)); + res.s01 = select(res.s23, res.s01, CONVERT(idx_sel.s01, VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 2) )); idx_sel.s0 = VECTOR_PREDICATE(in.s0, in.s1) || (in.s0 == in.s1 && CONVERT((res.s0 < res.s1), COND_DATA_TYPE)); - res.s0 = select(res.s1, res.s0, CONVERT(idx_sel.s0, int)); + res.s0 = select(res.s1, res.s0, CONVERT(idx_sel.s0, DATA_TYPE_OUTPUT)); *min_max_val = SCALAR_SELECT_OP(in.s0, in.s1); *min_max_idx = res.s0; } diff --git a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp index 41f885e4ba..2728958add 100644 --- a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp @@ -45,13 +45,15 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S32, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32, DataType::S64); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX && op != ReductionOperation::ARG_IDX_MIN, "Only ARG_IDX_MAX and ARG_IDX_MIN are supported"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis"); if(output->total_size() != 0) { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32, DataType::S32, DataType::S64, DataType::U64); } return Status{}; diff --git a/src/runtime/CL/functions/CLArgMinMaxLayer.cpp b/src/runtime/CL/functions/CLArgMinMaxLayer.cpp index ea6311afdb..b30d739025 100644 --- a/src/runtime/CL/functions/CLArgMinMaxLayer.cpp +++ b/src/runtime/CL/functions/CLArgMinMaxLayer.cpp @@ -96,10 +96,15 @@ void CLArgMinMaxLayer::configure(const CLCompileContext &compile_context, const DataType output_data_type = (output->info()->data_type() == DataType::UNKNOWN) ? DataType::S32 : output->info()->data_type(); auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape).set_data_type(output_data_type).reset_padding().set_is_resizable(true)); + TensorShape not_reshaped_output_shape{ input->info()->tensor_shape() }; + not_reshaped_output_shape.set(axis, 1); + auto_init_if_empty(*_not_reshaped_output.info(), input->info()->clone()->set_tensor_shape(not_reshaped_output_shape).set_data_type(output_data_type).reset_padding().set_is_resizable(true)); + _arg_min_max_kernel = std::make_unique(); _arg_min_max_kernel->configure(compile_context, input, &_not_reshaped_output, axis, op); _memory_group.manage(&_not_reshaped_output); + _reshape.configure(compile_context, &_not_reshaped_output, output); _not_reshaped_output.allocator()->allocate(); } -- cgit v1.2.1