From ab23dd0fbc632063235a6ad408241dc79a35d3e4 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 6 Jul 2020 14:57:36 +0100 Subject: COMPMID-3387: Support memory injection in CLActivationLayer Signed-off-by: Georgios Pinitas Change-Id: I31f9620607b372fc3340c71e748a5ea177d9da62 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3520 Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- src/runtime/CL/functions/CLActivationLayer.cpp | 58 +++++++++++++++++++++++--- src/runtime/CL/functions/CLLSTMLayer.cpp | 32 +++++++------- src/runtime/CL/functions/CLRNNLayer.cpp | 8 ++-- 3 files changed, 73 insertions(+), 25 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/CL/functions/CLActivationLayer.cpp b/src/runtime/CL/functions/CLActivationLayer.cpp index 989603a9df..d6b80200cd 100644 --- a/src/runtime/CL/functions/CLActivationLayer.cpp +++ b/src/runtime/CL/functions/CLActivationLayer.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" +#include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLRuntimeContext.h" @@ -30,11 +31,46 @@ namespace arm_compute { +namespace experimental +{ +void CLActivationLayer::configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo act_info) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(compile_context, input, output, act_info); + _kernel = std::move(k); +} + +Status CLActivationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info) +{ + return CLActivationLayerKernel::validate(input, output, act_info); +} + +MemoryRequirements CLActivationLayer::workspace() const +{ + return MemoryRequirements{}; +} +} // namespace experimental + +struct CLActivationLayer::Impl +{ + const ICLTensor *src{ nullptr }; + ICLTensor *dst{ nullptr }; + CLRuntimeContext *ctx{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + CLActivationLayer::CLActivationLayer(CLRuntimeContext *ctx) - : ICLSimpleFunction(ctx) + : _impl(support::cpp14::make_unique()) { + _impl->ctx = ctx; } +CLActivationLayer::CLActivationLayer(CLActivationLayer &&) = default; + +CLActivationLayer &CLActivationLayer::operator=(CLActivationLayer &&) = default; + +CLActivationLayer::~CLActivationLayer() = default; + void CLActivationLayer::configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info) { configure(CLKernelLibrary::get().get_compile_context(), input, output, act_info); @@ -42,13 +78,25 @@ void CLActivationLayer::configure(ICLTensor *input, ICLTensor *output, Activatio void CLActivationLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info) { - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(compile_context, input, output, act_info); - _kernel = std::move(k); + ARM_COMPUTE_ERROR_ON_NULLPTR(input); + + _impl->src = input; + _impl->dst = output == nullptr ? input : output; + + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(compile_context, _impl->src->info(), _impl->dst->info(), act_info); } Status CLActivationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info) { - return CLActivationLayerKernel::validate(input, output, act_info); + return experimental::CLActivationLayer::validate(input, output, act_info); +} + +void CLActivationLayer::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + + _impl->op->run(src, dst, {}); } } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLLSTMLayer.cpp b/src/runtime/CL/functions/CLLSTMLayer.cpp index 56f22e2fe0..e63a9cceb0 100644 --- a/src/runtime/CL/functions/CLLSTMLayer.cpp +++ b/src/runtime/CL/functions/CLLSTMLayer.cpp @@ -499,7 +499,7 @@ Status CLLSTMLayer::validate(const ITensorInfo *input, RoundingPolicy::TO_NEAREST_EVEN)); ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&forget_gate, forget_gate_bias, &forget_gate, ConvertPolicy::SATURATE)); } - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&forget_gate, &forget_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&forget_gate, &forget_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); // Validate input gate if(!lstm_params.has_cifg_opt()) @@ -534,7 +534,7 @@ Status CLLSTMLayer::validate(const ITensorInfo *input, ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplicationKernel::validate(&input_gate, lstm_params.input_layer_norm_weights(), &input_gate, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN)); ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&input_gate, lstm_params.input_gate_bias(), &input_gate, ConvertPolicy::SATURATE)); } - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&input_gate, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&input_gate, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); } else { @@ -552,14 +552,14 @@ Status CLLSTMLayer::validate(const ITensorInfo *input, RoundingPolicy::TO_NEAREST_EVEN)); ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&cell_state_tmp, cell_bias, &cell_state_tmp, ConvertPolicy::SATURATE)); } - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&cell_state_tmp, nullptr, activation_info)); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&cell_state_tmp, nullptr, activation_info)); ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplicationKernel::validate(&cell_state_tmp, &input_gate, &cell_state_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN)); ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplicationKernel::validate(&cell_state_tmp, &forget_gate, &cell_state_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN)); ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&cell_state_tmp, &cell_state_tmp, &cell_state_tmp, ConvertPolicy::SATURATE)); if(cell_threshold != 0.f) { - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&cell_state_tmp, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -cell_threshold, - cell_threshold))); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&cell_state_tmp, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -cell_threshold, + cell_threshold))); } std::vector in_out_weights; @@ -584,18 +584,18 @@ Status CLLSTMLayer::validate(const ITensorInfo *input, RoundingPolicy::TO_NEAREST_EVEN)); ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&output_gate_tmp, output_gate_bias, &output_gate_tmp, ConvertPolicy::SATURATE)); } - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&output_gate_tmp, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&output_gate_tmp, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); // Validate output state - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&cell_state_tmp, &cell_state_tmp, activation_info)); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&cell_state_tmp, &cell_state_tmp, activation_info)); ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplicationKernel::validate(&cell_state_tmp, &output_gate_tmp, &output_gate_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN)); if(lstm_params.has_projection()) { ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(&output_gate_tmp, lstm_params.projection_weights(), lstm_params.projection_bias(), output_state_out)); if(projection_threshold != 0.f) { - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(output_state_out, output_state_out, - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -projection_threshold, projection_threshold))); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(output_state_out, output_state_out, + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -projection_threshold, projection_threshold))); } } @@ -638,7 +638,7 @@ void CLLSTMLayer::run() CLScheduler::get().enqueue(_pixelwise_mul_forget_gate_coeff); CLScheduler::get().enqueue(_accum_forget_gate_bias); } - CLScheduler::get().enqueue(_activation_forget_gate); + _activation_forget_gate.run(); if(_run_cifg_opt) { @@ -661,7 +661,7 @@ void CLLSTMLayer::run() CLScheduler::get().enqueue(_pixelwise_mul_input_gate_coeff); CLScheduler::get().enqueue(_accum_input_gate_bias); } - CLScheduler::get().enqueue(_activation_input_gate); + _activation_input_gate.run(); } _fully_connected_cell_state.run(); @@ -674,14 +674,14 @@ void CLLSTMLayer::run() CLScheduler::get().enqueue(_pixelwise_mul_cell_gate_coeff); CLScheduler::get().enqueue(_accum_cell_gate_bias); } - CLScheduler::get().enqueue(_activation_cell_state); + _activation_cell_state.run(); CLScheduler::get().enqueue(_pixelwise_mul_cell_state1); CLScheduler::get().enqueue(_pixelwise_mul_cell_state2); CLScheduler::get().enqueue(_accum_cell_state2); if(_perform_cell_clipping) { - CLScheduler::get().enqueue(_cell_clip); + _cell_clip.run(); } _fully_connected_output.run(); @@ -697,9 +697,9 @@ void CLLSTMLayer::run() CLScheduler::get().enqueue(_pixelwise_mul_output_gate_coeff); CLScheduler::get().enqueue(_accum_output_gate_bias); } - CLScheduler::get().enqueue(_activation_output); + _activation_output.run(); - CLScheduler::get().enqueue(_activation_output_state); + _activation_output_state.run(); CLScheduler::get().enqueue(_pixelwise_mul_output_state2); if(_has_projection_weights) @@ -707,7 +707,7 @@ void CLLSTMLayer::run() _fully_connected_output_state.run(); if(_perform_projection_clipping) { - CLScheduler::get().enqueue(_projection_clip); + _projection_clip.run(); } } diff --git a/src/runtime/CL/functions/CLRNNLayer.cpp b/src/runtime/CL/functions/CLRNNLayer.cpp index 57b8d70089..075f4a4ebd 100644 --- a/src/runtime/CL/functions/CLRNNLayer.cpp +++ b/src/runtime/CL/functions/CLRNNLayer.cpp @@ -35,7 +35,7 @@ using namespace arm_compute; using namespace arm_compute::misc::shape_calculator; CLRNNLayer::CLRNNLayer(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), _activation_kernel(), _fully_connected_kernel(), _copy_kernel(), _fully_connected_out(), _gemm_output(), _add_output(), + : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), _activation(), _fully_connected_kernel(), _copy_kernel(), _fully_connected_out(), _gemm_output(), _add_output(), _is_prepared(false) { } @@ -60,7 +60,7 @@ Status CLRNNLayer::validate(const ITensorInfo *input, const ITensorInfo *weights ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(input, weights, bias, &shape_info)); ARM_COMPUTE_RETURN_ON_ERROR(CLGEMM::validate(hidden_state, recurrent_weights, nullptr, &shape_info, 1.f, 0.f)); ARM_COMPUTE_RETURN_ON_ERROR(CLSaturatedArithmeticOperationKernel::validate(ArithmeticOperation::ADD, &shape_info, &shape_info, &shape_info, ConvertPolicy::SATURATE)); - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&shape_info, &shape_info, info)); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&shape_info, &shape_info, info)); return Status{}; } @@ -101,7 +101,7 @@ void CLRNNLayer::configure(const CLCompileContext &compile_context, const ICLTen _fully_connected_out.allocator()->allocate(); _gemm_output.allocator()->allocate(); - _activation_kernel.configure(compile_context, &_add_output, hidden_state, info); + _activation.configure(compile_context, &_add_output, hidden_state, info); _add_output.allocator()->allocate(); _copy_kernel.configure(compile_context, hidden_state, output); @@ -116,7 +116,7 @@ void CLRNNLayer::run() _fully_connected_kernel.run(); _gemm_state_f.run(); CLScheduler::get().enqueue(_add_kernel); - CLScheduler::get().enqueue(_activation_kernel); + _activation.run(); // copy hidden out to output CLScheduler::get().enqueue(_copy_kernel); -- cgit v1.2.1