aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPablo Marquez Tello <pablo.tello@arm.com>2023-07-13 15:45:23 +0100
committerPablo Marquez Tello <pablo.tello@arm.com>2023-07-21 08:59:22 +0000
commit8dfb8820d5fe0f72a923eccc3bb73ee0b87d5511 (patch)
treea6709f9f77d39d9db4e795b2deb5c7d16ea0ca12
parent2e0714d4bb6795e34bcdcdaf812e9863dea2f43f (diff)
downloadComputeLibrary-8dfb8820d5fe0f72a923eccc3bb73ee0b87d5511.tar.gz
Enable S64 output in CLArgMinMax
Resolves MLCE-1089 Change-Id: I8b385ef8a00ec5de60299bc7a359766ba5417e68 Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9918 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/CL/cl_kernels/common/arg_min_max.cl18
-rw-r--r--src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp4
-rw-r--r--src/runtime/CL/functions/CLArgMinMaxLayer.cpp5
3 files changed, 17 insertions, 10 deletions
diff --git a/src/core/CL/cl_kernels/common/arg_min_max.cl b/src/core/CL/cl_kernels/common/arg_min_max.cl
index 438f46eb24..413fcf5333 100644
--- a/src/core/CL/cl_kernels/common/arg_min_max.cl
+++ b/src/core/CL/cl_kernels/common/arg_min_max.cl
@@ -85,9 +85,9 @@ inline DATA_TYPE_OUTPUT vectorized_compute_arg_min_max_4(DATA_TYPE *min_max_val,
VEC_DATA_TYPE(COND_DATA_TYPE, 2)
idx_sel = VECTOR_PREDICATE_EQ(in.s01, in.s23);
in.s01 = select(in.s23, in.s01, idx_sel);
- res.s01 = select(res.s23, res.s01, CONVERT(idx_sel, int2));
+ res.s01 = select(res.s23, res.s01, CONVERT(idx_sel, VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 2) ));
idx_sel.s0 = VECTOR_PREDICATE(in.s0, in.s1) || (in.s0 == in.s1 && CONVERT((res.s0 < res.s1), COND_DATA_TYPE));
- res.s0 = select(res.s1, res.s0, CONVERT(idx_sel.s0, int));
+ res.s0 = select(res.s1, res.s0, CONVERT(idx_sel.s0, DATA_TYPE_OUTPUT));
*min_max_val = SCALAR_SELECT_OP(in.s0, in.s1);
*min_max_idx = res.s0;
}
@@ -97,12 +97,12 @@ inline DATA_TYPE_OUTPUT vectorized_compute_arg_min_max_8(DATA_TYPE *min_max_val,
VEC_DATA_TYPE(COND_DATA_TYPE, 4)
idx_sel = VECTOR_PREDICATE_EQ(in.s0123, in.s4567);
in.s0123 = select(in.s4567, in.s0123, idx_sel);
- res.s0123 = select(res.s4567, res.s0123, CONVERT(idx_sel, int4));
+ res.s0123 = select(res.s4567, res.s0123, CONVERT(idx_sel, VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 4) ));
idx_sel.s01 = (VECTOR_PREDICATE(in.s01, in.s23)) || (in.s01 == in.s23 && CONVERT(((res.s01 < res.s23)), VEC_DATA_TYPE(COND_DATA_TYPE, 2)));
in.s01 = select(in.s23, in.s01, idx_sel.s01);
- res.s01 = select(res.s23, res.s01, CONVERT(idx_sel.s01, int2));
+ res.s01 = select(res.s23, res.s01, CONVERT(idx_sel.s01, VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 2) ));
idx_sel.s0 = VECTOR_PREDICATE(in.s0, in.s1) || (in.s0 == in.s1 && CONVERT((res.s0 < res.s1), COND_DATA_TYPE));
- res.s0 = select(res.s1, res.s0, CONVERT(idx_sel.s0, int));
+ res.s0 = select(res.s1, res.s0, CONVERT(idx_sel.s0, DATA_TYPE_OUTPUT));
*min_max_val = SCALAR_SELECT_OP(in.s0, in.s1);
*min_max_idx = res.s0;
}
@@ -112,15 +112,15 @@ inline DATA_TYPE_OUTPUT vectorized_compute_arg_min_max_16(DATA_TYPE *min_max_val
VEC_DATA_TYPE(COND_DATA_TYPE, 8)
idx_sel = VECTOR_PREDICATE_EQ(in.s01234567, in.s89abcdef);
in.s01234567 = select(in.s89abcdef, in.s01234567, idx_sel);
- res.s01234567 = select(res.s89abcdef, res.s01234567, CONVERT(idx_sel, int8));
+ res.s01234567 = select(res.s89abcdef, res.s01234567, CONVERT(idx_sel, VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 8) ));
idx_sel.s0123 = VECTOR_PREDICATE(in.s0123, in.s4567) || (in.s0123 == in.s4567 && CONVERT(((res.s0123 < res.s4567)), VEC_DATA_TYPE(COND_DATA_TYPE, 4)));
in.s0123 = select(in.s4567, in.s0123, idx_sel.s0123);
- res.s0123 = select(res.s4567, res.s0123, CONVERT(idx_sel.s0123, int4));
+ res.s0123 = select(res.s4567, res.s0123, CONVERT(idx_sel.s0123, VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 4) ));
idx_sel.s01 = (VECTOR_PREDICATE(in.s01, in.s23)) || (in.s01 == in.s23 && CONVERT(((res.s01 < res.s23)), VEC_DATA_TYPE(COND_DATA_TYPE, 2)));
in.s01 = select(in.s23, in.s01, idx_sel.s01);
- res.s01 = select(res.s23, res.s01, CONVERT(idx_sel.s01, int2));
+ res.s01 = select(res.s23, res.s01, CONVERT(idx_sel.s01, VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 2) ));
idx_sel.s0 = VECTOR_PREDICATE(in.s0, in.s1) || (in.s0 == in.s1 && CONVERT((res.s0 < res.s1), COND_DATA_TYPE));
- res.s0 = select(res.s1, res.s0, CONVERT(idx_sel.s0, int));
+ res.s0 = select(res.s1, res.s0, CONVERT(idx_sel.s0, DATA_TYPE_OUTPUT));
*min_max_val = SCALAR_SELECT_OP(in.s0, in.s1);
*min_max_idx = res.s0;
}
diff --git a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp
index 41f885e4ba..2728958add 100644
--- a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp
@@ -45,13 +45,15 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S32, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32, DataType::S64);
+
ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX && op != ReductionOperation::ARG_IDX_MIN, "Only ARG_IDX_MAX and ARG_IDX_MIN are supported");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis");
if(output->total_size() != 0)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32, DataType::S32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32, DataType::S32, DataType::S64, DataType::U64);
}
return Status{};
diff --git a/src/runtime/CL/functions/CLArgMinMaxLayer.cpp b/src/runtime/CL/functions/CLArgMinMaxLayer.cpp
index ea6311afdb..b30d739025 100644
--- a/src/runtime/CL/functions/CLArgMinMaxLayer.cpp
+++ b/src/runtime/CL/functions/CLArgMinMaxLayer.cpp
@@ -96,10 +96,15 @@ void CLArgMinMaxLayer::configure(const CLCompileContext &compile_context, const
DataType output_data_type = (output->info()->data_type() == DataType::UNKNOWN) ? DataType::S32 : output->info()->data_type();
auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape).set_data_type(output_data_type).reset_padding().set_is_resizable(true));
+ TensorShape not_reshaped_output_shape{ input->info()->tensor_shape() };
+ not_reshaped_output_shape.set(axis, 1);
+ auto_init_if_empty(*_not_reshaped_output.info(), input->info()->clone()->set_tensor_shape(not_reshaped_output_shape).set_data_type(output_data_type).reset_padding().set_is_resizable(true));
+
_arg_min_max_kernel = std::make_unique<CLArgMinMaxLayerKernel>();
_arg_min_max_kernel->configure(compile_context, input, &_not_reshaped_output, axis, op);
_memory_group.manage(&_not_reshaped_output);
+
_reshape.configure(compile_context, &_not_reshaped_output, output);
_not_reshaped_output.allocator()->allocate();
}