From ab23dd0fbc632063235a6ad408241dc79a35d3e4 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 6 Jul 2020 14:57:36 +0100 Subject: COMPMID-3387: Support memory injection in CLActivationLayer Signed-off-by: Georgios Pinitas Change-Id: I31f9620607b372fc3340c71e748a5ea177d9da62 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3520 Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- src/core/CL/kernels/CLActivationLayerKernel.cpp | 43 ++++++++---------- src/core/CL/kernels/CLReshapeLayerKernel.cpp | 5 ++- src/runtime/CL/functions/CLActivationLayer.cpp | 58 ++++++++++++++++++++++--- src/runtime/CL/functions/CLLSTMLayer.cpp | 32 +++++++------- src/runtime/CL/functions/CLRNNLayer.cpp | 8 ++-- 5 files changed, 95 insertions(+), 51 deletions(-) (limited to 'src') diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp index d40e9a15be..e030177549 100644 --- a/src/core/CL/kernels/CLActivationLayerKernel.cpp +++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp @@ -34,6 +34,7 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/helpers/float_ops.h" +#include "arm_compute/core/utils/misc/Cast.h" #include "support/StringSupport.h" #include @@ -116,16 +117,11 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } // namespace CLActivationLayerKernel::CLActivationLayerKernel() - : _input(nullptr), _output(nullptr), _run_in_place(false) + : _run_in_place(false) { } -void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, act_info); -} - -void CLActivationLayerKernel::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info) +void CLActivationLayerKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo act_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input); @@ -134,14 +130,13 @@ void CLActivationLayerKernel::configure(const CLCompileContext &compile_context, if(output != nullptr) { // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), - *input->info()->clone()); + auto_init_if_empty(*output, *input->clone()); } - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr, act_info)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, (output != nullptr) ? output : nullptr, act_info)); - const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size(); - const DataType dt = input->info()->data_type(); + const unsigned int num_elems_processed_per_iteration = 16 / input->element_size(); + const DataType dt = input->data_type(); float a_const = act_info.a(); float b_const = act_info.b(); @@ -163,7 +158,7 @@ void CLActivationLayerKernel::configure(const CLCompileContext &compile_context, // Set quantization info build options if(is_quantized) { - const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq_info = input->quantization_info().uniform(); if(!perform_activation_in_float) { @@ -214,7 +209,7 @@ void CLActivationLayerKernel::configure(const CLCompileContext &compile_context, // Set scale and offset of the input and output if they have different quantization info if(output != nullptr) { - const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->quantization_info().uniform(); if(iq_info != oq_info) { @@ -233,12 +228,8 @@ void CLActivationLayerKernel::configure(const CLCompileContext &compile_context, // Create kernel _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); - // Make sure _kernel is initialized before calling the parent's configure - _input = input; - _output = output; - // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), (_run_in_place) ? nullptr : output->info()); + auto win_config = validate_and_configure_window(input, (_run_in_place) ? nullptr : output); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); ICLKernel::configure_internal(win_config.second); @@ -246,9 +237,9 @@ void CLActivationLayerKernel::configure(const CLCompileContext &compile_context, _config_id = "activation_layer_"; _config_id += lower_string(string_from_data_type(dt)); _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); + _config_id += support::cpp11::to_string(input->dimension(0)); _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); + _config_id += support::cpp11::to_string(input->dimension(1)); } Status CLActivationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info) @@ -260,21 +251,25 @@ Status CLActivationLayerKernel::validate(const ITensorInfo *input, const ITensor return Status{}; } -void CLActivationLayerKernel::run(const Window &window, cl::CommandQueue &queue) +void CLActivationLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); + const auto src = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC)); + auto dst = utils::cast::polymorphic_downcast(outputs.at(TensorType::ACL_DST)); + ARM_COMPUTE_ERROR_ON(_run_in_place && src != dst); + Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); Window slice = collapsed.first_slice_window_3D(); do { unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, slice); + add_3D_tensor_argument(idx, src, slice); if(!_run_in_place) { - add_3D_tensor_argument(idx, _output, slice); + add_3D_tensor_argument(idx, dst, slice); } enqueue(queue, *this, slice, lws_hint()); } diff --git a/src/core/CL/kernels/CLReshapeLayerKernel.cpp b/src/core/CL/kernels/CLReshapeLayerKernel.cpp index 97fde8645e..d486b06c8e 100644 --- a/src/core/CL/kernels/CLReshapeLayerKernel.cpp +++ b/src/core/CL/kernels/CLReshapeLayerKernel.cpp @@ -34,6 +34,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Window.h" +#include "arm_compute/core/utils/misc/Cast.h" #include @@ -107,8 +108,8 @@ void CLReshapeLayerKernel::run_op(const InputTensorMap &inputs, const OutputTens Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); Window slice = window_collapsed.first_slice_window_3D(); - const auto src = dynamic_cast(inputs.at(TensorType::ACL_SRC)); - auto dst = dynamic_cast(outputs.at(TensorType::ACL_DST)); + const auto src = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC)); + auto dst = utils::cast::polymorphic_downcast(outputs.at(TensorType::ACL_DST)); // Set inputs unsigned int idx = 0; diff --git a/src/runtime/CL/functions/CLActivationLayer.cpp b/src/runtime/CL/functions/CLActivationLayer.cpp index 989603a9df..d6b80200cd 100644 --- a/src/runtime/CL/functions/CLActivationLayer.cpp +++ b/src/runtime/CL/functions/CLActivationLayer.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" +#include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLRuntimeContext.h" @@ -30,11 +31,46 @@ namespace arm_compute { +namespace experimental +{ +void CLActivationLayer::configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo act_info) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(compile_context, input, output, act_info); + _kernel = std::move(k); +} + +Status CLActivationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info) +{ + return CLActivationLayerKernel::validate(input, output, act_info); +} + +MemoryRequirements CLActivationLayer::workspace() const +{ + return MemoryRequirements{}; +} +} // namespace experimental + +struct CLActivationLayer::Impl +{ + const ICLTensor *src{ nullptr }; + ICLTensor *dst{ nullptr }; + CLRuntimeContext *ctx{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + CLActivationLayer::CLActivationLayer(CLRuntimeContext *ctx) - : ICLSimpleFunction(ctx) + : _impl(support::cpp14::make_unique()) { + _impl->ctx = ctx; } +CLActivationLayer::CLActivationLayer(CLActivationLayer &&) = default; + +CLActivationLayer &CLActivationLayer::operator=(CLActivationLayer &&) = default; + +CLActivationLayer::~CLActivationLayer() = default; + void CLActivationLayer::configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info) { configure(CLKernelLibrary::get().get_compile_context(), input, output, act_info); @@ -42,13 +78,25 @@ void CLActivationLayer::configure(ICLTensor *input, ICLTensor *output, Activatio void CLActivationLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info) { - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(compile_context, input, output, act_info); - _kernel = std::move(k); + ARM_COMPUTE_ERROR_ON_NULLPTR(input); + + _impl->src = input; + _impl->dst = output == nullptr ? input : output; + + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(compile_context, _impl->src->info(), _impl->dst->info(), act_info); } Status CLActivationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info) { - return CLActivationLayerKernel::validate(input, output, act_info); + return experimental::CLActivationLayer::validate(input, output, act_info); +} + +void CLActivationLayer::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + + _impl->op->run(src, dst, {}); } } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLLSTMLayer.cpp b/src/runtime/CL/functions/CLLSTMLayer.cpp index 56f22e2fe0..e63a9cceb0 100644 --- a/src/runtime/CL/functions/CLLSTMLayer.cpp +++ b/src/runtime/CL/functions/CLLSTMLayer.cpp @@ -499,7 +499,7 @@ Status CLLSTMLayer::validate(const ITensorInfo *input, RoundingPolicy::TO_NEAREST_EVEN)); ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&forget_gate, forget_gate_bias, &forget_gate, ConvertPolicy::SATURATE)); } - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&forget_gate, &forget_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&forget_gate, &forget_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); // Validate input gate if(!lstm_params.has_cifg_opt()) @@ -534,7 +534,7 @@ Status CLLSTMLayer::validate(const ITensorInfo *input, ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplicationKernel::validate(&input_gate, lstm_params.input_layer_norm_weights(), &input_gate, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN)); ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&input_gate, lstm_params.input_gate_bias(), &input_gate, ConvertPolicy::SATURATE)); } - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&input_gate, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&input_gate, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); } else { @@ -552,14 +552,14 @@ Status CLLSTMLayer::validate(const ITensorInfo *input, RoundingPolicy::TO_NEAREST_EVEN)); ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&cell_state_tmp, cell_bias, &cell_state_tmp, ConvertPolicy::SATURATE)); } - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&cell_state_tmp, nullptr, activation_info)); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&cell_state_tmp, nullptr, activation_info)); ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplicationKernel::validate(&cell_state_tmp, &input_gate, &cell_state_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN)); ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplicationKernel::validate(&cell_state_tmp, &forget_gate, &cell_state_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN)); ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&cell_state_tmp, &cell_state_tmp, &cell_state_tmp, ConvertPolicy::SATURATE)); if(cell_threshold != 0.f) { - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&cell_state_tmp, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -cell_threshold, - cell_threshold))); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&cell_state_tmp, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -cell_threshold, + cell_threshold))); } std::vector in_out_weights; @@ -584,18 +584,18 @@ Status CLLSTMLayer::validate(const ITensorInfo *input, RoundingPolicy::TO_NEAREST_EVEN)); ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&output_gate_tmp, output_gate_bias, &output_gate_tmp, ConvertPolicy::SATURATE)); } - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&output_gate_tmp, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&output_gate_tmp, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); // Validate output state - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&cell_state_tmp, &cell_state_tmp, activation_info)); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&cell_state_tmp, &cell_state_tmp, activation_info)); ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplicationKernel::validate(&cell_state_tmp, &output_gate_tmp, &output_gate_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN)); if(lstm_params.has_projection()) { ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(&output_gate_tmp, lstm_params.projection_weights(), lstm_params.projection_bias(), output_state_out)); if(projection_threshold != 0.f) { - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(output_state_out, output_state_out, - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -projection_threshold, projection_threshold))); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(output_state_out, output_state_out, + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -projection_threshold, projection_threshold))); } } @@ -638,7 +638,7 @@ void CLLSTMLayer::run() CLScheduler::get().enqueue(_pixelwise_mul_forget_gate_coeff); CLScheduler::get().enqueue(_accum_forget_gate_bias); } - CLScheduler::get().enqueue(_activation_forget_gate); + _activation_forget_gate.run(); if(_run_cifg_opt) { @@ -661,7 +661,7 @@ void CLLSTMLayer::run() CLScheduler::get().enqueue(_pixelwise_mul_input_gate_coeff); CLScheduler::get().enqueue(_accum_input_gate_bias); } - CLScheduler::get().enqueue(_activation_input_gate); + _activation_input_gate.run(); } _fully_connected_cell_state.run(); @@ -674,14 +674,14 @@ void CLLSTMLayer::run() CLScheduler::get().enqueue(_pixelwise_mul_cell_gate_coeff); CLScheduler::get().enqueue(_accum_cell_gate_bias); } - CLScheduler::get().enqueue(_activation_cell_state); + _activation_cell_state.run(); CLScheduler::get().enqueue(_pixelwise_mul_cell_state1); CLScheduler::get().enqueue(_pixelwise_mul_cell_state2); CLScheduler::get().enqueue(_accum_cell_state2); if(_perform_cell_clipping) { - CLScheduler::get().enqueue(_cell_clip); + _cell_clip.run(); } _fully_connected_output.run(); @@ -697,9 +697,9 @@ void CLLSTMLayer::run() CLScheduler::get().enqueue(_pixelwise_mul_output_gate_coeff); CLScheduler::get().enqueue(_accum_output_gate_bias); } - CLScheduler::get().enqueue(_activation_output); + _activation_output.run(); - CLScheduler::get().enqueue(_activation_output_state); + _activation_output_state.run(); CLScheduler::get().enqueue(_pixelwise_mul_output_state2); if(_has_projection_weights) @@ -707,7 +707,7 @@ void CLLSTMLayer::run() _fully_connected_output_state.run(); if(_perform_projection_clipping) { - CLScheduler::get().enqueue(_projection_clip); + _projection_clip.run(); } } diff --git a/src/runtime/CL/functions/CLRNNLayer.cpp b/src/runtime/CL/functions/CLRNNLayer.cpp index 57b8d70089..075f4a4ebd 100644 --- a/src/runtime/CL/functions/CLRNNLayer.cpp +++ b/src/runtime/CL/functions/CLRNNLayer.cpp @@ -35,7 +35,7 @@ using namespace arm_compute; using namespace arm_compute::misc::shape_calculator; CLRNNLayer::CLRNNLayer(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), _activation_kernel(), _fully_connected_kernel(), _copy_kernel(), _fully_connected_out(), _gemm_output(), _add_output(), + : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), _activation(), _fully_connected_kernel(), _copy_kernel(), _fully_connected_out(), _gemm_output(), _add_output(), _is_prepared(false) { } @@ -60,7 +60,7 @@ Status CLRNNLayer::validate(const ITensorInfo *input, const ITensorInfo *weights ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(input, weights, bias, &shape_info)); ARM_COMPUTE_RETURN_ON_ERROR(CLGEMM::validate(hidden_state, recurrent_weights, nullptr, &shape_info, 1.f, 0.f)); ARM_COMPUTE_RETURN_ON_ERROR(CLSaturatedArithmeticOperationKernel::validate(ArithmeticOperation::ADD, &shape_info, &shape_info, &shape_info, ConvertPolicy::SATURATE)); - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&shape_info, &shape_info, info)); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&shape_info, &shape_info, info)); return Status{}; } @@ -101,7 +101,7 @@ void CLRNNLayer::configure(const CLCompileContext &compile_context, const ICLTen _fully_connected_out.allocator()->allocate(); _gemm_output.allocator()->allocate(); - _activation_kernel.configure(compile_context, &_add_output, hidden_state, info); + _activation.configure(compile_context, &_add_output, hidden_state, info); _add_output.allocator()->allocate(); _copy_kernel.configure(compile_context, hidden_state, output); @@ -116,7 +116,7 @@ void CLRNNLayer::run() _fully_connected_kernel.run(); _gemm_state_f.run(); CLScheduler::get().enqueue(_add_kernel); - CLScheduler::get().enqueue(_activation_kernel); + _activation.run(); // copy hidden out to output CLScheduler::get().enqueue(_copy_kernel); -- cgit v1.2.1