From f20d6d6ae5a0da2c856294e93341cdc065db58f9 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Thu, 16 Jul 2020 17:46:51 +0100 Subject: COMPMID-3390: Async support to CLStridedSliceLayerKernel kernels/functions Signed-off-by: Michalis Spyrou Change-Id: I9ff7e8d2fb4d36c4b7c44e885abf34ff6d4c577c Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3587 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- src/core/CL/kernels/CLStridedSliceKernel.cpp | 49 +++++++++++----------------- 1 file changed, 19 insertions(+), 30 deletions(-) (limited to 'src/core/CL/kernels/CLStridedSliceKernel.cpp') diff --git a/src/core/CL/kernels/CLStridedSliceKernel.cpp b/src/core/CL/kernels/CLStridedSliceKernel.cpp index c1f4c4d1b5..94cbd43bb1 100644 --- a/src/core/CL/kernels/CLStridedSliceKernel.cpp +++ b/src/core/CL/kernels/CLStridedSliceKernel.cpp @@ -26,6 +26,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/utils/helpers/bit_ops.h" #include "arm_compute/core/utils/helpers/tensor_transform.h" +#include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "support/StringSupport.h" @@ -66,7 +67,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, return Status{}; } -std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, +std::pair validate_and_configure_window(const ITensorInfo *input, ITensorInfo *output, const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) { @@ -84,29 +85,14 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } } // namespace -CLStridedSliceKernel::CLStridedSliceKernel() - : _input(nullptr), _output(nullptr) -{ -} - -void CLStridedSliceKernel::configure(const ICLTensor *input, ICLTensor *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); -} - -void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, +void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), starts, ends, strides, begin_mask, end_mask, shrink_axis_mask)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask)); - _input = input; - _output = output; - - const TensorShape &input_shape = input->info()->tensor_shape(); + const TensorShape &input_shape = input->tensor_shape(); Coordinates starts_abs; Coordinates ends_abs; @@ -117,12 +103,12 @@ void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, co begin_mask, end_mask, shrink_axis_mask); // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), output->info(), starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); + auto win_config = validate_and_configure_window(input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); // Enable multiple elements processing along x if stride_x is 1 and output width greater than the access vector size - const int vec_size_x = 16 / input->info()->element_size(); - const int output_width_x = output->info()->tensor_shape().x(); + const int vec_size_x = 16 / input->element_size(); + const int output_width_x = output->tensor_shape().x(); const bool is_shrink_on_x = arm_compute::helpers::bit_ops::is_bit_set(shrink_axis_mask, 0); const bool multi_access_x = !is_shrink_on_x && (final_strides.x() == 1) && (output_width_x / vec_size_x > 0); @@ -137,7 +123,7 @@ void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, co // Create build options CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type()))); + build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(input->data_type()))); for(unsigned int i = 0; i < input_shape.num_dimensions(); ++i) { const bool is_shrink = arm_compute::helpers::bit_ops::is_bit_set(shrink_axis_mask, i); @@ -150,8 +136,8 @@ void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, co build_opts.add_option_if_else(input_shape.num_dimensions() > 2, "-DSRC_DEPTH=" + support::cpp11::to_string(input_shape.z()), "-DSRC_DEPTH=1"); - build_opts.add_option_if_else(_output->info()->num_dimensions() > 2, - "-DDST_DEPTH=" + support::cpp11::to_string(_output->info()->tensor_shape().z()), + build_opts.add_option_if_else(output->num_dimensions() > 2, + "-DDST_DEPTH=" + support::cpp11::to_string(output->tensor_shape().z()), "-DDST_DEPTH=1"); // Create kernel @@ -160,11 +146,11 @@ void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, co // Set config_id for enabling LWS tuning _config_id = "strided_slice"; _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); + _config_id += lower_string(string_from_data_type(input->data_type())); for(unsigned int i = 0; i < input_shape.num_dimensions(); ++i) { _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(i)); + _config_id += support::cpp11::to_string(input->dimension(i)); _config_id += "_"; _config_id += support::cpp11::to_string(starts_abs[i]); _config_id += "_"; @@ -186,19 +172,22 @@ Status CLStridedSliceKernel::validate(const ITensorInfo *input, const ITensorInf return Status{}; } -void CLStridedSliceKernel::run(const Window &window, cl::CommandQueue &queue) +void CLStridedSliceKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); + const auto src = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC)); + auto dst = utils::cast::polymorphic_downcast(outputs.at(TensorType::ACL_DST)); + Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); Window slice = window_collapsed.first_slice_window_4D(); do { unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, slice); - add_4D_tensor_argument(idx, _output, slice); + add_4D_tensor_argument(idx, src, slice); + add_4D_tensor_argument(idx, dst, slice); enqueue(queue, *this, slice, lws_hint()); } while(window_collapsed.slide_window_slice_4D(slice)); -- cgit v1.2.1