From f20d6d6ae5a0da2c856294e93341cdc065db58f9 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Thu, 16 Jul 2020 17:46:51 +0100 Subject: COMPMID-3390: Async support to CLStridedSliceLayerKernel kernels/functions Signed-off-by: Michalis Spyrou Change-Id: I9ff7e8d2fb4d36c4b7c44e885abf34ff6d4c577c Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3587 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- arm_compute/core/CL/kernels/CLStridedSliceKernel.h | 45 ++----------- arm_compute/runtime/CL/functions/CLSlice.h | 66 ++++++++++++++++++- arm_compute/runtime/CL/functions/CLStridedSlice.h | 76 +++++++++++++++++++++- src/core/CL/kernels/CLStridedSliceKernel.cpp | 49 ++++++-------- src/runtime/CL/functions/CLSlice.cpp | 53 +++++++++++++-- src/runtime/CL/functions/CLStridedSlice.cpp | 63 ++++++++++++++++-- 6 files changed, 269 insertions(+), 83 deletions(-) diff --git a/arm_compute/core/CL/kernels/CLStridedSliceKernel.h b/arm_compute/core/CL/kernels/CLStridedSliceKernel.h index ff3b0697a5..28a665b113 100644 --- a/arm_compute/core/CL/kernels/CLStridedSliceKernel.h +++ b/arm_compute/core/CL/kernels/CLStridedSliceKernel.h @@ -31,49 +31,17 @@ namespace arm_compute { -// Forward declarations -class ICLTensor; - /** Interface for the kernel to perform tensor strided slicing */ class CLStridedSliceKernel : public ICLKernel { public: - /** Default constructor */ - CLStridedSliceKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLStridedSliceKernel(const CLStridedSliceKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLStridedSliceKernel &operator=(const CLStridedSliceKernel &) = delete; - /** Allow instances of this class to be moved */ - CLStridedSliceKernel(CLStridedSliceKernel &&) = default; - /** Allow instances of this class to be moved */ - CLStridedSliceKernel &operator=(CLStridedSliceKernel &&) = default; - /** Default destructor */ - ~CLStridedSliceKernel() = default; - /** Configure kernel - * - * @note Supported tensor rank: up to 4 - * - * @param[in] input Source tensor. Data type supported: All. - * @param[out] output Destination tensor. Data type supported: Same as @p input - * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. - * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. - * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. - * A slice of size 1 starting from starts[i] in the dimension must be preserved. - */ - void configure(const ICLTensor *input, ICLTensor *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); /** Configure kernel * * @note Supported tensor rank: up to 4 * * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data type supported: All. - * @param[out] output Destination tensor. Data type supported: Same as @p input + * @param[in] input Source tensor info. Data type supported: All. + * @param[out] output Destination tensor info. Data type supported: Same as @p input * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). @@ -82,7 +50,7 @@ public: * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. * A slice of size 1 starting from starts[i] in the dimension must be preserved. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, + void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); @@ -105,11 +73,8 @@ public: int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - ICLTensor *_output; /**< Destination tensor */ + void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, cl::CommandQueue &queue) override; }; } // namespace arm_compute #endif /*ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H */ diff --git a/arm_compute/runtime/CL/functions/CLSlice.h b/arm_compute/runtime/CL/functions/CLSlice.h index 9f9591e4de..6fe62acaf5 100644 --- a/arm_compute/runtime/CL/functions/CLSlice.h +++ b/arm_compute/runtime/CL/functions/CLSlice.h @@ -24,17 +24,72 @@ #ifndef ARM_COMPUTE_CL_SLICE_H #define ARM_COMPUTE_CL_SLICE_H -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" +#include "arm_compute/runtime/CL/ICLOperator.h" +#include "arm_compute/runtime/IFunction.h" namespace arm_compute { // Forward Declarations class ICLTensor; +namespace experimental +{ /** Basic function to perform tensor slicing */ -class CLSlice : public ICLSimpleFunction +class CLSlice : public ICLOperator { public: + /** Configure kernel + * + * @note Supported tensor rank: up to 4 + * @note Start indices must be non-negative. 0 <= starts[i] + * @note End coordinates can be negative, which represents the number of elements before the end of that dimension. + * @note End indices are not inclusive unless negative. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor info. Data type supported: All. + * @param[out] output Destination tensor info. Data type supported: Same as @p input + * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). + */ + void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, const Coordinates &starts, const Coordinates &ends); + + /** Static function to check if given info will lead to a valid configuration of @ref CLSlice + * + * @note Supported tensor rank: up to 4 + * @note Start indices must be non-negative. 0 <= starts[i] + * @note End coordinates can be negative, which represents the number of elements before the end of that dimension. + * @note End indices are not inclusive unless negative. + * + * @param[in] input Source tensor info. Data type supported: All + * @param[in] output Destination tensor info. Data type supported: Same as @p input + * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * + * @return A status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends); + + // Inherited methods overridden: + MemoryRequirements workspace() const override; +}; +} // namespace experimental + +/** Basic function to perform tensor slicing */ +class CLSlice : public IFunction +{ +public: + /** Default Constructor */ + CLSlice(); + /** Default Destructor */ + ~CLSlice(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSlice(const CLSlice &) = delete; + /** Default move constructor */ + CLSlice(CLSlice &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSlice &operator=(const CLSlice &) = delete; + /** Default move assignment operator */ + CLSlice &operator=(CLSlice &&); /** Configure kernel * * @note Supported tensor rank: up to 4 @@ -78,6 +133,13 @@ public: * @return A status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr _impl; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CL_SLICE_H */ diff --git a/arm_compute/runtime/CL/functions/CLStridedSlice.h b/arm_compute/runtime/CL/functions/CLStridedSlice.h index 98a3bd49d3..394d8c4f59 100644 --- a/arm_compute/runtime/CL/functions/CLStridedSlice.h +++ b/arm_compute/runtime/CL/functions/CLStridedSlice.h @@ -24,7 +24,9 @@ #ifndef ARM_COMPUTE_CL_STRIDED_SLICE_H #define ARM_COMPUTE_CL_STRIDED_SLICE_H -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" +#include "arm_compute/runtime/CL/CLRuntimeContext.h" +#include "arm_compute/runtime/CL/ICLOperator.h" +#include "arm_compute/runtime/IFunction.h" namespace arm_compute { @@ -32,9 +34,24 @@ namespace arm_compute class ICLTensor; /** Basic function to run @ref CLStridedSliceKernel */ -class CLStridedSlice : public ICLSimpleFunction +class CLStridedSlice : public IFunction { public: + /** Constructor + * + * @param[in] ctx Runtime context to be used by the function + */ + CLStridedSlice(CLRuntimeContext *ctx = nullptr); + /** Destructor */ + ~CLStridedSlice(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLStridedSlice(const CLStridedSlice &) = delete; + /** Default move constructor */ + CLStridedSlice(CLStridedSlice &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLStridedSlice &operator=(const CLStridedSlice &) = delete; + /** Default move assignment operator */ + CLStridedSlice &operator=(CLStridedSlice &&); /** Configure kernel * * @note Supported tensor rank: up to 4 @@ -88,6 +105,61 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr _impl; +}; + +namespace experimental +{ +/** Basic function to run @ref CLStridedSliceKernel */ +class CLStridedSlice : public ICLOperator +{ +public: + /** Configure kernel + * + * @note Supported tensor rank: up to 4 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor info. Data type supported: All. + * @param[out] output Destination tensor info. Data type supported: Same as @p input + * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] begin_mask (Optional) If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] end_mask (Optional) If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. + * A slice of size 1 starting from starts[i] in the dimension must be preserved. + */ + void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, + const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, + int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0); + + /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSlice + * + * @note Supported tensor rank: up to 4 + * + * @param[in] input Source tensor info. Data type supported: All. + * @param[in] output Destination tensor info. Data type supported: Same as @p input + * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] begin_mask (Optional) If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] end_mask (Optional) If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. + * A slice of size 1 starting from starts[i] in the dimension must be preserved. + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, + int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0); + + // Inherited methods overridden: + MemoryRequirements workspace() const override; }; +} // namespace experimental } // namespace arm_compute #endif /* ARM_COMPUTE_CL_STRIDED_SLICE_H */ diff --git a/src/core/CL/kernels/CLStridedSliceKernel.cpp b/src/core/CL/kernels/CLStridedSliceKernel.cpp index c1f4c4d1b5..94cbd43bb1 100644 --- a/src/core/CL/kernels/CLStridedSliceKernel.cpp +++ b/src/core/CL/kernels/CLStridedSliceKernel.cpp @@ -26,6 +26,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/utils/helpers/bit_ops.h" #include "arm_compute/core/utils/helpers/tensor_transform.h" +#include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "support/StringSupport.h" @@ -66,7 +67,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, return Status{}; } -std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, +std::pair validate_and_configure_window(const ITensorInfo *input, ITensorInfo *output, const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) { @@ -84,29 +85,14 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } } // namespace -CLStridedSliceKernel::CLStridedSliceKernel() - : _input(nullptr), _output(nullptr) -{ -} - -void CLStridedSliceKernel::configure(const ICLTensor *input, ICLTensor *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); -} - -void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, +void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), starts, ends, strides, begin_mask, end_mask, shrink_axis_mask)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask)); - _input = input; - _output = output; - - const TensorShape &input_shape = input->info()->tensor_shape(); + const TensorShape &input_shape = input->tensor_shape(); Coordinates starts_abs; Coordinates ends_abs; @@ -117,12 +103,12 @@ void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, co begin_mask, end_mask, shrink_axis_mask); // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), output->info(), starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); + auto win_config = validate_and_configure_window(input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); // Enable multiple elements processing along x if stride_x is 1 and output width greater than the access vector size - const int vec_size_x = 16 / input->info()->element_size(); - const int output_width_x = output->info()->tensor_shape().x(); + const int vec_size_x = 16 / input->element_size(); + const int output_width_x = output->tensor_shape().x(); const bool is_shrink_on_x = arm_compute::helpers::bit_ops::is_bit_set(shrink_axis_mask, 0); const bool multi_access_x = !is_shrink_on_x && (final_strides.x() == 1) && (output_width_x / vec_size_x > 0); @@ -137,7 +123,7 @@ void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, co // Create build options CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type()))); + build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(input->data_type()))); for(unsigned int i = 0; i < input_shape.num_dimensions(); ++i) { const bool is_shrink = arm_compute::helpers::bit_ops::is_bit_set(shrink_axis_mask, i); @@ -150,8 +136,8 @@ void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, co build_opts.add_option_if_else(input_shape.num_dimensions() > 2, "-DSRC_DEPTH=" + support::cpp11::to_string(input_shape.z()), "-DSRC_DEPTH=1"); - build_opts.add_option_if_else(_output->info()->num_dimensions() > 2, - "-DDST_DEPTH=" + support::cpp11::to_string(_output->info()->tensor_shape().z()), + build_opts.add_option_if_else(output->num_dimensions() > 2, + "-DDST_DEPTH=" + support::cpp11::to_string(output->tensor_shape().z()), "-DDST_DEPTH=1"); // Create kernel @@ -160,11 +146,11 @@ void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, co // Set config_id for enabling LWS tuning _config_id = "strided_slice"; _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); + _config_id += lower_string(string_from_data_type(input->data_type())); for(unsigned int i = 0; i < input_shape.num_dimensions(); ++i) { _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(i)); + _config_id += support::cpp11::to_string(input->dimension(i)); _config_id += "_"; _config_id += support::cpp11::to_string(starts_abs[i]); _config_id += "_"; @@ -186,19 +172,22 @@ Status CLStridedSliceKernel::validate(const ITensorInfo *input, const ITensorInf return Status{}; } -void CLStridedSliceKernel::run(const Window &window, cl::CommandQueue &queue) +void CLStridedSliceKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); + const auto src = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC)); + auto dst = utils::cast::polymorphic_downcast(outputs.at(TensorType::ACL_DST)); + Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); Window slice = window_collapsed.first_slice_window_4D(); do { unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, slice); - add_4D_tensor_argument(idx, _output, slice); + add_4D_tensor_argument(idx, src, slice); + add_4D_tensor_argument(idx, dst, slice); enqueue(queue, *this, slice, lws_hint()); } while(window_collapsed.slide_window_slice_4D(slice)); diff --git a/src/runtime/CL/functions/CLSlice.cpp b/src/runtime/CL/functions/CLSlice.cpp index 129d97afeb..b60daeee44 100644 --- a/src/runtime/CL/functions/CLSlice.cpp +++ b/src/runtime/CL/functions/CLSlice.cpp @@ -31,12 +31,9 @@ namespace arm_compute { -void CLSlice::configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends) +namespace experimental { - configure(CLKernelLibrary::get().get_compile_context(), input, output, starts, ends); -} - -void CLSlice::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends) +void CLSlice::configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, const Coordinates &starts, const Coordinates &ends) { ARM_COMPUTE_ERROR_ON_NULLPTR(input); @@ -63,4 +60,50 @@ Status CLSlice::validate(const ITensorInfo *input, const ITensorInfo *output, co return CLStridedSliceKernel::validate(input, output, starts, ends, BiStrides(), 0, slice_end_mask, 0); } + +MemoryRequirements CLSlice::workspace() const +{ + return MemoryRequirements{}; +} +} // namespace experimental + +struct CLSlice::Impl +{ + const ICLTensor *src{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +CLSlice::CLSlice() + : _impl(support::cpp14::make_unique()) +{ +} +CLSlice::CLSlice(CLSlice &&) = default; +CLSlice &CLSlice::operator=(CLSlice &&) = default; +CLSlice::~CLSlice() = default; + +Status CLSlice::validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends) +{ + return experimental::CLSlice::validate(input, output, starts, ends); +} + +void CLSlice::configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, starts, ends); +} + +void CLSlice::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends) +{ + _impl->src = input; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(compile_context, input->info(), output->info(), starts, ends); +} + +void CLSlice::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + _impl->op->run(src, dst, {}); +} } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLStridedSlice.cpp b/src/runtime/CL/functions/CLStridedSlice.cpp index fc011ceaf7..d1b16700ff 100644 --- a/src/runtime/CL/functions/CLStridedSlice.cpp +++ b/src/runtime/CL/functions/CLStridedSlice.cpp @@ -23,12 +23,55 @@ */ #include "arm_compute/runtime/CL/functions/CLStridedSlice.h" +#include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h" #include "arm_compute/core/Types.h" #include "support/MemorySupport.h" namespace arm_compute { +namespace experimental +{ +void CLStridedSlice::configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, + const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, + int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(compile_context, input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); + _kernel = std::move(k); +} + +Status CLStridedSlice::validate(const ITensorInfo *input, const ITensorInfo *output, + const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, + int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) +{ + return CLStridedSliceKernel::validate(input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); +} + +MemoryRequirements CLStridedSlice::workspace() const +{ + return MemoryRequirements{}; +} +} // namespace experimental + +struct CLStridedSlice::Impl +{ + const ICLTensor *src{ nullptr }; + ICLTensor *dst{ nullptr }; + CLRuntimeContext *ctx{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +CLStridedSlice::CLStridedSlice(CLRuntimeContext *ctx) + : _impl(support::cpp14::make_unique()) +{ + _impl->ctx = ctx; +} + +CLStridedSlice::CLStridedSlice(CLStridedSlice &&) = default; +CLStridedSlice &CLStridedSlice::operator=(CLStridedSlice &&) = default; +CLStridedSlice::~CLStridedSlice() = default; + void CLStridedSlice::configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) @@ -40,15 +83,27 @@ void CLStridedSlice::configure(const CLCompileContext &compile_context, const IC const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) { - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(compile_context, input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); - _kernel = std::move(k); + ARM_COMPUTE_ERROR_ON_NULLPTR(input); + + _impl->src = input; + _impl->dst = output; + + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(compile_context, _impl->src->info(), _impl->dst->info(), starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); } Status CLStridedSlice::validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) { - return CLStridedSliceKernel::validate(input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); + return experimental::CLStridedSlice::validate(input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); +} + +void CLStridedSlice::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + + _impl->op->run(src, dst, {}); } } // namespace arm_compute -- cgit v1.2.1