From f20d6d6ae5a0da2c856294e93341cdc065db58f9 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Thu, 16 Jul 2020 17:46:51 +0100 Subject: COMPMID-3390: Async support to CLStridedSliceLayerKernel kernels/functions Signed-off-by: Michalis Spyrou Change-Id: I9ff7e8d2fb4d36c4b7c44e885abf34ff6d4c577c Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3587 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- src/core/CL/kernels/CLStridedSliceKernel.cpp | 49 +++++++++------------- src/runtime/CL/functions/CLSlice.cpp | 53 ++++++++++++++++++++--- src/runtime/CL/functions/CLStridedSlice.cpp | 63 ++++++++++++++++++++++++++-- 3 files changed, 126 insertions(+), 39 deletions(-) (limited to 'src') diff --git a/src/core/CL/kernels/CLStridedSliceKernel.cpp b/src/core/CL/kernels/CLStridedSliceKernel.cpp index c1f4c4d1b5..94cbd43bb1 100644 --- a/src/core/CL/kernels/CLStridedSliceKernel.cpp +++ b/src/core/CL/kernels/CLStridedSliceKernel.cpp @@ -26,6 +26,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/utils/helpers/bit_ops.h" #include "arm_compute/core/utils/helpers/tensor_transform.h" +#include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "support/StringSupport.h" @@ -66,7 +67,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, return Status{}; } -std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, +std::pair validate_and_configure_window(const ITensorInfo *input, ITensorInfo *output, const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) { @@ -84,29 +85,14 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } } // namespace -CLStridedSliceKernel::CLStridedSliceKernel() - : _input(nullptr), _output(nullptr) -{ -} - -void CLStridedSliceKernel::configure(const ICLTensor *input, ICLTensor *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); -} - -void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, +void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), starts, ends, strides, begin_mask, end_mask, shrink_axis_mask)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask)); - _input = input; - _output = output; - - const TensorShape &input_shape = input->info()->tensor_shape(); + const TensorShape &input_shape = input->tensor_shape(); Coordinates starts_abs; Coordinates ends_abs; @@ -117,12 +103,12 @@ void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, co begin_mask, end_mask, shrink_axis_mask); // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), output->info(), starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); + auto win_config = validate_and_configure_window(input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); // Enable multiple elements processing along x if stride_x is 1 and output width greater than the access vector size - const int vec_size_x = 16 / input->info()->element_size(); - const int output_width_x = output->info()->tensor_shape().x(); + const int vec_size_x = 16 / input->element_size(); + const int output_width_x = output->tensor_shape().x(); const bool is_shrink_on_x = arm_compute::helpers::bit_ops::is_bit_set(shrink_axis_mask, 0); const bool multi_access_x = !is_shrink_on_x && (final_strides.x() == 1) && (output_width_x / vec_size_x > 0); @@ -137,7 +123,7 @@ void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, co // Create build options CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type()))); + build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(input->data_type()))); for(unsigned int i = 0; i < input_shape.num_dimensions(); ++i) { const bool is_shrink = arm_compute::helpers::bit_ops::is_bit_set(shrink_axis_mask, i); @@ -150,8 +136,8 @@ void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, co build_opts.add_option_if_else(input_shape.num_dimensions() > 2, "-DSRC_DEPTH=" + support::cpp11::to_string(input_shape.z()), "-DSRC_DEPTH=1"); - build_opts.add_option_if_else(_output->info()->num_dimensions() > 2, - "-DDST_DEPTH=" + support::cpp11::to_string(_output->info()->tensor_shape().z()), + build_opts.add_option_if_else(output->num_dimensions() > 2, + "-DDST_DEPTH=" + support::cpp11::to_string(output->tensor_shape().z()), "-DDST_DEPTH=1"); // Create kernel @@ -160,11 +146,11 @@ void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, co // Set config_id for enabling LWS tuning _config_id = "strided_slice"; _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); + _config_id += lower_string(string_from_data_type(input->data_type())); for(unsigned int i = 0; i < input_shape.num_dimensions(); ++i) { _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(i)); + _config_id += support::cpp11::to_string(input->dimension(i)); _config_id += "_"; _config_id += support::cpp11::to_string(starts_abs[i]); _config_id += "_"; @@ -186,19 +172,22 @@ Status CLStridedSliceKernel::validate(const ITensorInfo *input, const ITensorInf return Status{}; } -void CLStridedSliceKernel::run(const Window &window, cl::CommandQueue &queue) +void CLStridedSliceKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); + const auto src = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC)); + auto dst = utils::cast::polymorphic_downcast(outputs.at(TensorType::ACL_DST)); + Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); Window slice = window_collapsed.first_slice_window_4D(); do { unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, slice); - add_4D_tensor_argument(idx, _output, slice); + add_4D_tensor_argument(idx, src, slice); + add_4D_tensor_argument(idx, dst, slice); enqueue(queue, *this, slice, lws_hint()); } while(window_collapsed.slide_window_slice_4D(slice)); diff --git a/src/runtime/CL/functions/CLSlice.cpp b/src/runtime/CL/functions/CLSlice.cpp index 129d97afeb..b60daeee44 100644 --- a/src/runtime/CL/functions/CLSlice.cpp +++ b/src/runtime/CL/functions/CLSlice.cpp @@ -31,12 +31,9 @@ namespace arm_compute { -void CLSlice::configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends) +namespace experimental { - configure(CLKernelLibrary::get().get_compile_context(), input, output, starts, ends); -} - -void CLSlice::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends) +void CLSlice::configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, const Coordinates &starts, const Coordinates &ends) { ARM_COMPUTE_ERROR_ON_NULLPTR(input); @@ -63,4 +60,50 @@ Status CLSlice::validate(const ITensorInfo *input, const ITensorInfo *output, co return CLStridedSliceKernel::validate(input, output, starts, ends, BiStrides(), 0, slice_end_mask, 0); } + +MemoryRequirements CLSlice::workspace() const +{ + return MemoryRequirements{}; +} +} // namespace experimental + +struct CLSlice::Impl +{ + const ICLTensor *src{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +CLSlice::CLSlice() + : _impl(support::cpp14::make_unique()) +{ +} +CLSlice::CLSlice(CLSlice &&) = default; +CLSlice &CLSlice::operator=(CLSlice &&) = default; +CLSlice::~CLSlice() = default; + +Status CLSlice::validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends) +{ + return experimental::CLSlice::validate(input, output, starts, ends); +} + +void CLSlice::configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, starts, ends); +} + +void CLSlice::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends) +{ + _impl->src = input; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(compile_context, input->info(), output->info(), starts, ends); +} + +void CLSlice::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + _impl->op->run(src, dst, {}); +} } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLStridedSlice.cpp b/src/runtime/CL/functions/CLStridedSlice.cpp index fc011ceaf7..d1b16700ff 100644 --- a/src/runtime/CL/functions/CLStridedSlice.cpp +++ b/src/runtime/CL/functions/CLStridedSlice.cpp @@ -23,12 +23,55 @@ */ #include "arm_compute/runtime/CL/functions/CLStridedSlice.h" +#include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h" #include "arm_compute/core/Types.h" #include "support/MemorySupport.h" namespace arm_compute { +namespace experimental +{ +void CLStridedSlice::configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, + const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, + int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(compile_context, input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); + _kernel = std::move(k); +} + +Status CLStridedSlice::validate(const ITensorInfo *input, const ITensorInfo *output, + const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, + int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) +{ + return CLStridedSliceKernel::validate(input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); +} + +MemoryRequirements CLStridedSlice::workspace() const +{ + return MemoryRequirements{}; +} +} // namespace experimental + +struct CLStridedSlice::Impl +{ + const ICLTensor *src{ nullptr }; + ICLTensor *dst{ nullptr }; + CLRuntimeContext *ctx{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +CLStridedSlice::CLStridedSlice(CLRuntimeContext *ctx) + : _impl(support::cpp14::make_unique()) +{ + _impl->ctx = ctx; +} + +CLStridedSlice::CLStridedSlice(CLStridedSlice &&) = default; +CLStridedSlice &CLStridedSlice::operator=(CLStridedSlice &&) = default; +CLStridedSlice::~CLStridedSlice() = default; + void CLStridedSlice::configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) @@ -40,15 +83,27 @@ void CLStridedSlice::configure(const CLCompileContext &compile_context, const IC const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) { - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(compile_context, input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); - _kernel = std::move(k); + ARM_COMPUTE_ERROR_ON_NULLPTR(input); + + _impl->src = input; + _impl->dst = output; + + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(compile_context, _impl->src->info(), _impl->dst->info(), starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); } Status CLStridedSlice::validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) { - return CLStridedSliceKernel::validate(input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); + return experimental::CLStridedSlice::validate(input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); +} + +void CLStridedSlice::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + + _impl->op->run(src, dst, {}); } } // namespace arm_compute -- cgit v1.2.1