From f738fe6b6e059916294c48b942952c261569df18 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Wed, 15 Jul 2020 18:10:17 +0100 Subject: COMPMID-3389: Async support to CLElementwiseUnaryLayerKernel kernels/functions Signed-off-by: Michalis Spyrou Change-Id: I2ce75a4705cfd75e30aefa0a2ea31e751b975469 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3579 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- .../CL/kernels/CLElementWiseUnaryLayerKernel.cpp | 33 +- .../CL/functions/CLElementWiseUnaryLayer.cpp | 355 +++++++++++++++++++-- 2 files changed, 344 insertions(+), 44 deletions(-) (limited to 'src') diff --git a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp index 78ab813f67..87fafd340c 100644 --- a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp +++ b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp @@ -26,10 +26,11 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/utils/misc/Cast.h" #include "support/StringSupport.h" -using namespace arm_compute; - +namespace arm_compute +{ namespace { Status validate_arguments(const ITensorInfo &input, const ITensorInfo &output) @@ -50,26 +51,22 @@ Status validate_arguments(const ITensorInfo &input, const ITensorInfo &output) } } // namespace -void CLElementWiseUnaryLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const ElementWiseUnary &op) +void CLElementWiseUnaryLayerKernel::configure(const ITensorInfo *input, ITensorInfo *output, const ElementWiseUnary &op) { configure(CLKernelLibrary::get().get_compile_context(), input, output, op); } -void CLElementWiseUnaryLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ElementWiseUnary &op) +void CLElementWiseUnaryLayerKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, const ElementWiseUnary &op) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input->info(), *output->info())); - - // Configure kernel window - _input = input; - _output = output; + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input, *output)); const std::string kernel_name = "elementwise_unary"; - const int vec_size_x = 16 / output->info()->element_size(); - const int output_width_x = output->info()->tensor_shape().x(); + const int vec_size_x = 16 / output->element_size(); + const int output_width_x = output->tensor_shape().x(); const bool multi_access_x = (output_width_x / vec_size_x > 0); - Window win = calculate_max_window(*output->info()); + Window win = calculate_max_window(*output); if(multi_access_x) { win.set(Window::DimX, @@ -79,7 +76,7 @@ void CLElementWiseUnaryLayerKernel::configure(const CLCompileContext &compile_co // Set kernel build options CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); + build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->data_type())); build_opts.add_option_if(multi_access_x, "-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x)); build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max(output_width_x - vec_size_x, 0))); switch(op) @@ -122,7 +119,7 @@ Status CLElementWiseUnaryLayerKernel::validate(const ITensorInfo *input, const I return Status{}; } -void CLElementWiseUnaryLayerKernel::run(const Window &window, cl::CommandQueue &queue) +void CLElementWiseUnaryLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); @@ -130,12 +127,16 @@ void CLElementWiseUnaryLayerKernel::run(const Window &window, cl::CommandQueue & Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); Window slice = collapsed.first_slice_window_3D(); + const auto src = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC)); + auto dst = utils::cast::polymorphic_downcast(outputs.at(TensorType::ACL_DST)); + do { unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, slice); - add_3D_tensor_argument(idx, _output, slice); + add_3D_tensor_argument(idx, src, slice); + add_3D_tensor_argument(idx, dst, slice); enqueue(queue, *this, slice, lws_hint()); } while(collapsed.slide_window_slice_3D(slice)); } +} // namespace arm_compute diff --git a/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp b/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp index 1a5bdeb252..402b9648a7 100644 --- a/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp +++ b/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp @@ -30,6 +30,144 @@ namespace arm_compute { +namespace experimental +{ +void CLRsqrtLayer::configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(compile_context, input, output, ElementWiseUnary::RSQRT); + _kernel = std::move(k); +} + +Status CLRsqrtLayer::validate(const ITensorInfo *input, const ITensorInfo *output) +{ + return arm_compute::CLElementWiseUnaryLayerKernel::validate(input, output, ElementWiseUnary::RSQRT); +} + +MemoryRequirements CLRsqrtLayer::workspace() const +{ + return MemoryRequirements{}; +} + +void CLExpLayer::configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(compile_context, input, output, ElementWiseUnary::EXP); + _kernel = std::move(k); +} + +Status CLExpLayer::validate(const ITensorInfo *input, const ITensorInfo *output) +{ + return arm_compute::CLElementWiseUnaryLayerKernel::validate(input, output, ElementWiseUnary::EXP); +} + +MemoryRequirements CLExpLayer::workspace() const +{ + return MemoryRequirements{}; +} + +void CLNegLayer::configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(compile_context, input, output, ElementWiseUnary::NEG); + _kernel = std::move(k); +} + +Status CLNegLayer::validate(const ITensorInfo *input, const ITensorInfo *output) +{ + return arm_compute::CLElementWiseUnaryLayerKernel::validate(input, output, ElementWiseUnary::NEG); +} + +MemoryRequirements CLNegLayer::workspace() const +{ + return MemoryRequirements{}; +} + +void CLSinLayer::configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(compile_context, input, output, ElementWiseUnary::SIN); + _kernel = std::move(k); +} + +Status CLSinLayer::validate(const ITensorInfo *input, const ITensorInfo *output) +{ + return arm_compute::CLElementWiseUnaryLayerKernel::validate(input, output, ElementWiseUnary::SIN); +} + +MemoryRequirements CLSinLayer::workspace() const +{ + return MemoryRequirements{}; +} + +void CLAbsLayer::configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(compile_context, input, output, ElementWiseUnary::ABS); + _kernel = std::move(k); +} + +Status CLAbsLayer::validate(const ITensorInfo *input, const ITensorInfo *output) +{ + return arm_compute::CLElementWiseUnaryLayerKernel::validate(input, output, ElementWiseUnary::ABS); +} + +MemoryRequirements CLAbsLayer::workspace() const +{ + return MemoryRequirements{}; +} + +void CLLogLayer::configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(compile_context, input, output, ElementWiseUnary::LOG); + _kernel = std::move(k); +} + +Status CLLogLayer::validate(const ITensorInfo *input, const ITensorInfo *output) +{ + return arm_compute::CLElementWiseUnaryLayerKernel::validate(input, output, ElementWiseUnary::LOG); +} + +MemoryRequirements CLLogLayer::workspace() const +{ + return MemoryRequirements{}; +} + +void CLRoundLayer::configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(compile_context, input, output, ElementWiseUnary::ROUND); + _kernel = std::move(k); +} + +Status CLRoundLayer::validate(const ITensorInfo *input, const ITensorInfo *output) +{ + return arm_compute::CLElementWiseUnaryLayerKernel::validate(input, output, ElementWiseUnary::ROUND); +} + +MemoryRequirements CLRoundLayer::workspace() const +{ + return MemoryRequirements{}; +} +} // namespace experimental + +struct CLRsqrtLayer::Impl +{ + const ICLTensor *src{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +CLRsqrtLayer::CLRsqrtLayer() + : _impl(support::cpp14::make_unique()) +{ +} + +CLRsqrtLayer::CLRsqrtLayer(CLRsqrtLayer &&) = default; +CLRsqrtLayer &CLRsqrtLayer::operator=(CLRsqrtLayer &&) = default; +CLRsqrtLayer::~CLRsqrtLayer() = default; + void CLRsqrtLayer::configure(const ICLTensor *input, ICLTensor *output) { configure(CLKernelLibrary::get().get_compile_context(), input, output); @@ -37,15 +175,41 @@ void CLRsqrtLayer::configure(const ICLTensor *input, ICLTensor *output) void CLRsqrtLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) { - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(compile_context, input, output, ElementWiseUnary::RSQRT); - _kernel = std::move(k); + _impl->src = input; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(compile_context, input->info(), output->info()); } + Status CLRsqrtLayer::validate(const ITensorInfo *input, const ITensorInfo *output) { - return CLElementWiseUnaryLayerKernel::validate(input, output, ElementWiseUnary::RSQRT); + return experimental::CLRsqrtLayer::validate(input, output); } +void CLRsqrtLayer::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + + _impl->op->run(src, dst, {}); +} + +struct CLExpLayer::Impl +{ + const ICLTensor *src{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +CLExpLayer::CLExpLayer() + : _impl(support::cpp14::make_unique()) +{ +} + +CLExpLayer::CLExpLayer(CLExpLayer &&) = default; +CLExpLayer &CLExpLayer::operator=(CLExpLayer &&) = default; +CLExpLayer::~CLExpLayer() = default; + void CLExpLayer::configure(const ICLTensor *input, ICLTensor *output) { configure(CLKernelLibrary::get().get_compile_context(), input, output); @@ -53,15 +217,41 @@ void CLExpLayer::configure(const ICLTensor *input, ICLTensor *output) void CLExpLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) { - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(compile_context, input, output, ElementWiseUnary::EXP); - _kernel = std::move(k); + _impl->src = input; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(compile_context, input->info(), output->info()); } + Status CLExpLayer::validate(const ITensorInfo *input, const ITensorInfo *output) { - return CLElementWiseUnaryLayerKernel::validate(input, output, ElementWiseUnary::EXP); + return experimental::CLExpLayer::validate(input, output); } +void CLExpLayer::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + + _impl->op->run(src, dst, {}); +} + +struct CLNegLayer::Impl +{ + const ICLTensor *src{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +CLNegLayer::CLNegLayer() + : _impl(support::cpp14::make_unique()) +{ +} + +CLNegLayer::CLNegLayer(CLNegLayer &&) = default; +CLNegLayer &CLNegLayer::operator=(CLNegLayer &&) = default; +CLNegLayer::~CLNegLayer() = default; + void CLNegLayer::configure(const ICLTensor *input, ICLTensor *output) { configure(CLKernelLibrary::get().get_compile_context(), input, output); @@ -69,15 +259,40 @@ void CLNegLayer::configure(const ICLTensor *input, ICLTensor *output) void CLNegLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) { - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(compile_context, input, output, ElementWiseUnary::NEG); - _kernel = std::move(k); + _impl->src = input; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(compile_context, input->info(), output->info()); } Status CLNegLayer::validate(const ITensorInfo *input, const ITensorInfo *output) { - return CLElementWiseUnaryLayerKernel::validate(input, output, ElementWiseUnary::NEG); + return experimental::CLNegLayer::validate(input, output); +} + +void CLNegLayer::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + + _impl->op->run(src, dst, {}); } +struct CLSinLayer::Impl +{ + const ICLTensor *src{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +CLSinLayer::CLSinLayer() + : _impl(support::cpp14::make_unique()) +{ +} + +CLSinLayer::CLSinLayer(CLSinLayer &&) = default; +CLSinLayer &CLSinLayer::operator=(CLSinLayer &&) = default; +CLSinLayer::~CLSinLayer() = default; + void CLSinLayer::configure(const ICLTensor *input, ICLTensor *output) { configure(CLKernelLibrary::get().get_compile_context(), input, output); @@ -85,15 +300,40 @@ void CLSinLayer::configure(const ICLTensor *input, ICLTensor *output) void CLSinLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) { - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(compile_context, input, output, ElementWiseUnary::SIN); - _kernel = std::move(k); + _impl->src = input; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(compile_context, input->info(), output->info()); } Status CLSinLayer::validate(const ITensorInfo *input, const ITensorInfo *output) { - return CLElementWiseUnaryLayerKernel::validate(input, output, ElementWiseUnary::SIN); + return experimental::CLSinLayer::validate(input, output); +} + +void CLSinLayer::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + + _impl->op->run(src, dst, {}); +} + +struct CLAbsLayer::Impl +{ + const ICLTensor *src{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +CLAbsLayer::CLAbsLayer() + : _impl(support::cpp14::make_unique()) +{ } +CLAbsLayer::CLAbsLayer(CLAbsLayer &&) = default; +CLAbsLayer &CLAbsLayer::operator=(CLAbsLayer &&) = default; +CLAbsLayer::~CLAbsLayer() = default; + void CLAbsLayer::configure(const ICLTensor *input, ICLTensor *output) { configure(CLKernelLibrary::get().get_compile_context(), input, output); @@ -101,14 +341,40 @@ void CLAbsLayer::configure(const ICLTensor *input, ICLTensor *output) void CLAbsLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) { - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(compile_context, input, output, ElementWiseUnary::ABS); - _kernel = std::move(k); + _impl->src = input; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(compile_context, input->info(), output->info()); } Status CLAbsLayer::validate(const ITensorInfo *input, const ITensorInfo *output) { - return CLElementWiseUnaryLayerKernel::validate(input, output, ElementWiseUnary::ABS); + return experimental::CLAbsLayer::validate(input, output); +} + +void CLAbsLayer::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + + _impl->op->run(src, dst, {}); +} + +struct CLLogLayer::Impl +{ + const ICLTensor *src{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +CLLogLayer::CLLogLayer() + : _impl(support::cpp14::make_unique()) +{ } + +CLLogLayer::CLLogLayer(CLLogLayer &&) = default; +CLLogLayer &CLLogLayer::operator=(CLLogLayer &&) = default; +CLLogLayer::~CLLogLayer() = default; + void CLLogLayer::configure(const ICLTensor *input, ICLTensor *output) { configure(CLKernelLibrary::get().get_compile_context(), input, output); @@ -116,15 +382,40 @@ void CLLogLayer::configure(const ICLTensor *input, ICLTensor *output) void CLLogLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) { - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(compile_context, input, output, ElementWiseUnary::LOG); - _kernel = std::move(k); + _impl->src = input; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(compile_context, input->info(), output->info()); } Status CLLogLayer::validate(const ITensorInfo *input, const ITensorInfo *output) { - return CLElementWiseUnaryLayerKernel::validate(input, output, ElementWiseUnary::LOG); + return experimental::CLLogLayer::validate(input, output); +} + +void CLLogLayer::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + + _impl->op->run(src, dst, {}); } +struct CLRoundLayer::Impl +{ + const ICLTensor *src{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +CLRoundLayer::CLRoundLayer() + : _impl(support::cpp14::make_unique()) +{ +} + +CLRoundLayer::CLRoundLayer(CLRoundLayer &&) = default; +CLRoundLayer &CLRoundLayer::operator=(CLRoundLayer &&) = default; +CLRoundLayer::~CLRoundLayer() = default; + void CLRoundLayer::configure(const ICLTensor *input, ICLTensor *output) { configure(CLKernelLibrary::get().get_compile_context(), input, output); @@ -132,13 +423,21 @@ void CLRoundLayer::configure(const ICLTensor *input, ICLTensor *output) void CLRoundLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) { - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(compile_context, input, output, ElementWiseUnary::ROUND); - _kernel = std::move(k); + _impl->src = input; + _impl->dst = output; + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(compile_context, input->info(), output->info()); } Status CLRoundLayer::validate(const ITensorInfo *input, const ITensorInfo *output) { - return CLElementWiseUnaryLayerKernel::validate(input, output, ElementWiseUnary::ROUND); + return experimental::CLRoundLayer::validate(input, output); } +void CLRoundLayer::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + + _impl->op->run(src, dst, {}); +} } // namespace arm_compute -- cgit v1.2.1