diff options
Diffstat (limited to 'src/core/CL/kernels/CLGatherKernel.cpp')
-rw-r--r-- | src/core/CL/kernels/CLGatherKernel.cpp | 48 |
1 files changed, 31 insertions, 17 deletions
diff --git a/src/core/CL/kernels/CLGatherKernel.cpp b/src/core/CL/kernels/CLGatherKernel.cpp index cbd540d80b..904bb07282 100644 --- a/src/core/CL/kernels/CLGatherKernel.cpp +++ b/src/core/CL/kernels/CLGatherKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,8 +22,10 @@ * SOFTWARE. */ #include "src/core/CL/kernels/CLGatherKernel.h" + #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" + #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" @@ -34,20 +36,22 @@ namespace arm_compute { namespace { -inline Status validate_arguments(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis) +inline Status +validate_arguments(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, indices, output); const uint32_t actual_axis = wrap_around(axis, static_cast<int>(input->num_dimensions())); - ARM_COMPUTE_RETURN_ERROR_ON(indices->num_dimensions() > 1); - ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); + ARM_COMPUTE_RETURN_ERROR_ON((input->num_dimensions() + indices->num_dimensions() - 1) > 4); + ARM_COMPUTE_RETURN_ERROR_ON(actual_axis >= input->num_dimensions()); ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); - if(output->total_size() != 0) + if (output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); - TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape(input->tensor_shape(), indices->tensor_shape(), actual_axis); + TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape( + input->tensor_shape(), indices->tensor_shape(), actual_axis); ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size()); } @@ -56,12 +60,14 @@ inline Status validate_arguments(const ITensorInfo *input, const ITensorInfo *in return Status{}; } -std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *indices, ITensorInfo *output, int axis) +std::pair<Status, Window> +validate_and_configure_window(ITensorInfo *input, ITensorInfo *indices, ITensorInfo *output, int axis) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, indices); const uint32_t actual_axis = wrap_around(axis, static_cast<int>(input->num_dimensions())); // Output auto initialization if not yet initialized - TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape(input->tensor_shape(), indices->tensor_shape(), actual_axis); + TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape( + input->tensor_shape(), indices->tensor_shape(), actual_axis); auto_init_if_empty((*output), output_shape, 1, input->data_type()); // Create window @@ -72,9 +78,9 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen } // namespace -CLGatherKernel::CLGatherKernel() - : _input(nullptr), _indices(nullptr), _output(nullptr), _axis(0) +CLGatherKernel::CLGatherKernel() : _input(nullptr), _indices(nullptr), _output(nullptr), _axis(0) { + _type = CLKernelType::ELEMENTWISE; } void CLGatherKernel::configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis) @@ -82,10 +88,14 @@ void CLGatherKernel::configure(const ICLTensor *input, const ICLTensor *indices, configure(CLKernelLibrary::get().get_compile_context(), input, indices, output, axis); } -void CLGatherKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis) +void CLGatherKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const ICLTensor *indices, + ICLTensor *output, + int axis) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, indices); - auto padding_info = get_padding_info({ input, output, indices }); + auto padding_info = get_padding_info({input, output, indices}); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), indices->info(), output->info(), axis)); // Configure kernel window @@ -99,10 +109,12 @@ void CLGatherKernel::configure(const CLCompileContext &compile_context, const IC // Set build options CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type()))); + build_opts.add_option("-DDATA_TYPE=" + + get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type()))); build_opts.add_option("-DOUTPUT_DIM_Z=" + support::cpp11::to_string(output->info()->dimension(2))); - build_opts.add_option("-DINPUT_DIM_Z=" + support::cpp11::to_string(input->info()->dimension(2))); + build_opts.add_option("-DINDICES_DIMS=" + support::cpp11::to_string(indices->info()->num_dimensions())); build_opts.add_option("-DAXIS=" + support::cpp11::to_string(_axis)); + build_opts.add_option("-DINDEX_LIMIT=" + support::cpp11::to_string(input->info()->tensor_shape()[_axis])); // Create kernel _kernel = create_kernel(compile_context, "gather", build_opts.options()); @@ -110,10 +122,12 @@ void CLGatherKernel::configure(const CLCompileContext &compile_context, const IC ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -Status CLGatherKernel::validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis) +Status +CLGatherKernel::validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, indices, output, axis)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), indices->clone().get(), output->clone().get(), axis).first); + ARM_COMPUTE_RETURN_ON_ERROR( + validate_and_configure_window(input->clone().get(), indices->clone().get(), output->clone().get(), axis).first); return Status{}; } @@ -125,7 +139,7 @@ void CLGatherKernel::run(const Window &window, cl::CommandQueue &queue) Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); unsigned int idx = 0; add_4D_tensor_argument(idx, _input, window_collapsed); - add_1D_tensor_argument(idx, _indices, window_collapsed); + add_4D_tensor_argument(idx, _indices, window_collapsed); add_4D_tensor_argument(idx, _output, window_collapsed); enqueue(queue, *this, window_collapsed, lws_hint()); } |